0f8fb7ac60da6c44debb07c1d3c34ec69f3409a1
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
36
37 #include "radeon_debug.h"
38 #include "r600_context.h"
39
40 #include "r700_assembler.h"
41 #include "evergreen_sq.h"
42
43 #define USE_CF_FOR_CONTINUE_BREAK 1
44 #define USE_CF_FOR_POP_AFTER 1
45
46 struct prog_instruction noise1_insts[12] = {
47 {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
52 {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
53 {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
58 {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
59 };
60 float noise1_const[2][4] = {
61 {0.300000f, 0.900000f, 0.500000f, 0.300000f}
62 };
63
64 COMPILED_SUB noise1_presub = {
65 &(noise1_insts[0]),
66 12,
67 2,
68 1,
69 0,
70 &(noise1_const[0]),
71 SWIZZLE_X,
72 SWIZZLE_X,
73 SWIZZLE_X,
74 SWIZZLE_X,
75 {0,0,0},
76 0
77 };
78
79 BITS addrmode_PVSDST(PVSDST * pPVSDST)
80 {
81 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
82 }
83
84 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
85 {
86 pPVSDST->addrmode0 = addrmode & 1;
87 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
88 }
89
90 void nomask_PVSDST(PVSDST * pPVSDST)
91 {
92 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
93 }
94
95 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
96 {
97 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
98 }
99
100 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
101 {
102 pPVSSRC->addrmode0 = addrmode & 1;
103 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
104 }
105
106
107 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
108 {
109 pPVSSRC->swizzlex =
110 pPVSSRC->swizzley =
111 pPVSSRC->swizzlez =
112 pPVSSRC->swizzlew = swz;
113 }
114
115 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
116 {
117 pPVSSRC->swizzlex = SQ_SEL_X;
118 pPVSSRC->swizzley = SQ_SEL_Y;
119 pPVSSRC->swizzlez = SQ_SEL_Z;
120 pPVSSRC->swizzlew = SQ_SEL_W;
121 }
122
123 void
124 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
125 {
126 switch (x)
127 {
128 case SQ_SEL_X: x = pPVSSRC->swizzlex;
129 break;
130 case SQ_SEL_Y: x = pPVSSRC->swizzley;
131 break;
132 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
133 break;
134 case SQ_SEL_W: x = pPVSSRC->swizzlew;
135 break;
136 default:;
137 }
138
139 switch (y)
140 {
141 case SQ_SEL_X: y = pPVSSRC->swizzlex;
142 break;
143 case SQ_SEL_Y: y = pPVSSRC->swizzley;
144 break;
145 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
146 break;
147 case SQ_SEL_W: y = pPVSSRC->swizzlew;
148 break;
149 default:;
150 }
151
152 switch (z)
153 {
154 case SQ_SEL_X: z = pPVSSRC->swizzlex;
155 break;
156 case SQ_SEL_Y: z = pPVSSRC->swizzley;
157 break;
158 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
159 break;
160 case SQ_SEL_W: z = pPVSSRC->swizzlew;
161 break;
162 default:;
163 }
164
165 switch (w)
166 {
167 case SQ_SEL_X: w = pPVSSRC->swizzlex;
168 break;
169 case SQ_SEL_Y: w = pPVSSRC->swizzley;
170 break;
171 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
172 break;
173 case SQ_SEL_W: w = pPVSSRC->swizzlew;
174 break;
175 default:;
176 }
177
178 pPVSSRC->swizzlex = x;
179 pPVSSRC->swizzley = y;
180 pPVSSRC->swizzlez = z;
181 pPVSSRC->swizzlew = w;
182 }
183
184 void neg_PVSSRC(PVSSRC* pPVSSRC)
185 {
186 pPVSSRC->negx = 1;
187 pPVSSRC->negy = 1;
188 pPVSSRC->negz = 1;
189 pPVSSRC->negw = 1;
190 }
191
192 void noneg_PVSSRC(PVSSRC* pPVSSRC)
193 {
194 pPVSSRC->negx = 0;
195 pPVSSRC->negy = 0;
196 pPVSSRC->negz = 0;
197 pPVSSRC->negw = 0;
198 }
199
200 // negate argument (for SUB instead of ADD and alike)
201 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
202 {
203 pPVSSRC->negx = !pPVSSRC->negx;
204 pPVSSRC->negy = !pPVSSRC->negy;
205 pPVSSRC->negz = !pPVSSRC->negz;
206 pPVSSRC->negw = !pPVSSRC->negw;
207 }
208
209 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
210 {
211 switch (c)
212 {
213 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
214 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
215 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
216 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
217 default:;
218 }
219 }
220
221 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
222 {
223 switch (c)
224 {
225 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
226 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
227 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
228 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
229 default:;
230 }
231 }
232
233 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
234 {
235 return (pOutVTXFmt0->point_size |
236 pOutVTXFmt0->edge_flag |
237 pOutVTXFmt0->rta_index |
238 pOutVTXFmt0->kill_flag |
239 pOutVTXFmt0->viewport_index);
240 }
241
242 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
243 {
244 return (pFPOutFmt->depth |
245 pFPOutFmt->stencil_ref |
246 pFPOutFmt->mask |
247 pFPOutFmt->coverage_to_mask);
248 }
249
250 GLboolean is_reduction_opcode(PVSDWORD* dest)
251 {
252 if (dest->dst.op3 == 0)
253 {
254 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
255 {
256 return GL_TRUE;
257 }
258 }
259 return GL_FALSE;
260 }
261
262 GLboolean EG_is_reduction_opcode(PVSDWORD* dest)
263 {
264 if (dest->dst.op3 == 0)
265 {
266 if ( (dest->dst.opcode == EG_OP2_INST_DOT4 || dest->dst.opcode == EG_OP2_INST_DOT4_IEEE || dest->dst.opcode == EG_OP2_INST_CUBE) )
267 {
268 return GL_TRUE;
269 }
270 }
271 return GL_FALSE;
272 }
273
274 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
275 {
276 GLuint format = FMT_INVALID;
277 GLuint uiElemSize = 0;
278
279 switch (eType)
280 {
281 case GL_BYTE:
282 case GL_UNSIGNED_BYTE:
283 uiElemSize = 1;
284 switch(nChannels)
285 {
286 case 1:
287 format = FMT_8; break;
288 case 2:
289 format = FMT_8_8; break;
290 case 3:
291 /* for some (small/unaligned) strides using 4 comps works
292 * better, probably same as GL_SHORT below
293 * test piglit/draw-vertices */
294 format = FMT_8_8_8_8; break;
295 case 4:
296 format = FMT_8_8_8_8; break;
297 default:
298 break;
299 }
300 break;
301
302 case GL_UNSIGNED_SHORT:
303 case GL_SHORT:
304 uiElemSize = 2;
305 switch(nChannels)
306 {
307 case 1:
308 format = FMT_16; break;
309 case 2:
310 format = FMT_16_16; break;
311 case 3:
312 /* 3 comp GL_SHORT vertex format doesnt work on r700
313 4 somehow works, test - sauerbraten */
314 format = FMT_16_16_16_16; break;
315 case 4:
316 format = FMT_16_16_16_16; break;
317 default:
318 break;
319 }
320 break;
321
322 case GL_UNSIGNED_INT:
323 case GL_INT:
324 uiElemSize = 4;
325 switch(nChannels)
326 {
327 case 1:
328 format = FMT_32; break;
329 case 2:
330 format = FMT_32_32; break;
331 case 3:
332 format = FMT_32_32_32; break;
333 case 4:
334 format = FMT_32_32_32_32; break;
335 default:
336 break;
337 }
338 break;
339
340 case GL_FLOAT:
341 uiElemSize = 4;
342 switch(nChannels)
343 {
344 case 1:
345 format = FMT_32_FLOAT; break;
346 case 2:
347 format = FMT_32_32_FLOAT; break;
348 case 3:
349 format = FMT_32_32_32_FLOAT; break;
350 case 4:
351 format = FMT_32_32_32_32_FLOAT; break;
352 default:
353 break;
354 }
355 break;
356 case GL_DOUBLE:
357 uiElemSize = 8;
358 switch(nChannels)
359 {
360 case 1:
361 format = FMT_32_FLOAT; break;
362 case 2:
363 format = FMT_32_32_FLOAT; break;
364 case 3:
365 format = FMT_32_32_32_FLOAT; break;
366 case 4:
367 format = FMT_32_32_32_32_FLOAT; break;
368 default:
369 break;
370 }
371 break;
372 default:
373 ;
374 //GL_ASSERT_NO_CASE();
375 }
376
377 if(NULL != pClient_size)
378 {
379 *pClient_size = uiElemSize * nChannels;
380 }
381
382 return(format);
383 }
384
385 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
386 {
387 if(nIsOp3 > 0)
388 {
389 return 3;
390 }
391
392 switch (opcode)
393 {
394 case SQ_OP2_INST_ADD:
395 case SQ_OP2_INST_KILLE:
396 case SQ_OP2_INST_KILLGT:
397 case SQ_OP2_INST_KILLGE:
398 case SQ_OP2_INST_KILLNE:
399 case SQ_OP2_INST_MUL:
400 case SQ_OP2_INST_MAX:
401 case SQ_OP2_INST_MIN:
402 //case SQ_OP2_INST_MAX_DX10:
403 //case SQ_OP2_INST_MIN_DX10:
404 case SQ_OP2_INST_SETE:
405 case SQ_OP2_INST_SETNE:
406 case SQ_OP2_INST_SETGT:
407 case SQ_OP2_INST_SETGE:
408 case SQ_OP2_INST_PRED_SETE:
409 case SQ_OP2_INST_PRED_SETGT:
410 case SQ_OP2_INST_PRED_SETGE:
411 case SQ_OP2_INST_PRED_SETNE:
412 case SQ_OP2_INST_DOT4:
413 case SQ_OP2_INST_DOT4_IEEE:
414 case SQ_OP2_INST_CUBE:
415 return 2;
416
417 case SQ_OP2_INST_MOV:
418 case SQ_OP2_INST_MOVA_FLOOR:
419 case SQ_OP2_INST_FRACT:
420 case SQ_OP2_INST_FLOOR:
421 case SQ_OP2_INST_TRUNC:
422 case SQ_OP2_INST_EXP_IEEE:
423 case SQ_OP2_INST_LOG_CLAMPED:
424 case SQ_OP2_INST_LOG_IEEE:
425 case SQ_OP2_INST_RECIP_IEEE:
426 case SQ_OP2_INST_RECIPSQRT_IEEE:
427 case SQ_OP2_INST_FLT_TO_INT:
428 case SQ_OP2_INST_SIN:
429 case SQ_OP2_INST_COS:
430 return 1;
431
432 default: radeon_error(
433 "Need instruction operand number for %x.\n", opcode);
434 };
435
436 return 3;
437 }
438
439 unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3)
440 {
441 if(nIsOp3 > 0)
442 {
443 return 3;
444 }
445
446 switch (opcode)
447 {
448 case EG_OP2_INST_ADD:
449 case EG_OP2_INST_KILLE:
450 case EG_OP2_INST_KILLGT:
451 case EG_OP2_INST_KILLGE:
452 case EG_OP2_INST_KILLNE:
453 case EG_OP2_INST_MUL:
454 case EG_OP2_INST_MAX:
455 case EG_OP2_INST_MIN:
456 //case EG_OP2_INST_MAX_DX10:
457 //case EG_OP2_INST_MIN_DX10:
458 case EG_OP2_INST_SETE:
459 case EG_OP2_INST_SETNE:
460 case EG_OP2_INST_SETGT:
461 case EG_OP2_INST_SETGE:
462 case EG_OP2_INST_PRED_SETE:
463 case EG_OP2_INST_PRED_SETGT:
464 case EG_OP2_INST_PRED_SETGE:
465 case EG_OP2_INST_PRED_SETNE:
466 case EG_OP2_INST_DOT4:
467 case EG_OP2_INST_DOT4_IEEE:
468 case EG_OP2_INST_CUBE:
469 return 2;
470
471 case EG_OP2_INST_MOV:
472 //case SQ_OP2_INST_MOVA_FLOOR:
473 case EG_OP2_INST_FRACT:
474 case EG_OP2_INST_FLOOR:
475 case EG_OP2_INST_TRUNC:
476 case EG_OP2_INST_EXP_IEEE:
477 case EG_OP2_INST_LOG_CLAMPED:
478 case EG_OP2_INST_LOG_IEEE:
479 case EG_OP2_INST_RECIP_IEEE:
480 case EG_OP2_INST_RECIPSQRT_IEEE:
481 case EG_OP2_INST_FLT_TO_INT:
482 case EG_OP2_INST_SIN:
483 case EG_OP2_INST_COS:
484 return 1;
485
486 default: radeon_error(
487 "Need instruction operand number for %x.\n", opcode);
488 };
489
490 return 3;
491 }
492
493 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
494 {
495 GLuint i;
496
497 Init_R700_Shader(pShader);
498 pAsm->pR700Shader = pShader;
499 pAsm->currentShaderType = spt;
500
501 pAsm->cf_last_export_ptr = NULL;
502
503 pAsm->cf_current_export_clause_ptr = NULL;
504 pAsm->cf_current_alu_clause_ptr = NULL;
505 pAsm->cf_current_tex_clause_ptr = NULL;
506 pAsm->cf_current_vtx_clause_ptr = NULL;
507 pAsm->cf_current_cf_clause_ptr = NULL;
508
509 // No clause has been created yet
510 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
511
512 pAsm->number_of_colorandz_exports = 0;
513 pAsm->number_of_exports = 0;
514 pAsm->number_of_export_opcodes = 0;
515
516 pAsm->alu_x_opcode = 0;
517
518 pAsm->D2.bits = 0;
519
520 pAsm->D.bits = 0;
521 pAsm->S[0].bits = 0;
522 pAsm->S[1].bits = 0;
523 pAsm->S[2].bits = 0;
524
525 pAsm->uLastPosUpdate = 0;
526
527 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
528
529 pAsm->uIIns = 0;
530 pAsm->uOIns = 0;
531 pAsm->number_used_registers = 0;
532 pAsm->uUsedConsts = 256;
533
534
535 // Fragment programs
536 pAsm->uBoolConsts = 0;
537 pAsm->uIntConsts = 0;
538 pAsm->uInsts = 0;
539 pAsm->uConsts = 0;
540
541 pAsm->FCSP = 0;
542 pAsm->fc_stack[0].type = FC_NONE;
543
544 pAsm->aArgSubst[0] =
545 pAsm->aArgSubst[1] =
546 pAsm->aArgSubst[2] =
547 pAsm->aArgSubst[3] = (-1);
548
549 pAsm->uOutputs = 0;
550
551 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
552 {
553 pAsm->color_export_register_number[i] = (-1);
554 }
555
556
557 pAsm->depth_export_register_number = (-1);
558 pAsm->stencil_export_register_number = (-1);
559 pAsm->coverage_to_mask_export_register_number = (-1);
560 pAsm->mask_export_register_number = (-1);
561
562 pAsm->starting_export_register_number = 0;
563 pAsm->starting_vfetch_register_number = 0;
564 pAsm->starting_temp_register_number = 0;
565 pAsm->uFirstHelpReg = 0;
566
567 pAsm->input_position_is_used = GL_FALSE;
568 pAsm->input_normal_is_used = GL_FALSE;
569
570 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
571 {
572 pAsm->input_color_is_used[ i ] = GL_FALSE;
573 }
574
575 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
576 {
577 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
578 }
579
580 for (i=0; i<VERT_ATTRIB_MAX; i++)
581 {
582 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
583 }
584
585 pAsm->number_of_inputs = 0;
586
587 pAsm->is_tex = GL_FALSE;
588 pAsm->need_tex_barrier = GL_FALSE;
589
590 pAsm->subs = NULL;
591 pAsm->unSubArraySize = 0;
592 pAsm->unSubArrayPointer = 0;
593 pAsm->callers = NULL;
594 pAsm->unCallerArraySize = 0;
595 pAsm->unCallerArrayPointer = 0;
596
597 pAsm->CALLSP = 0;
598 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
599 pAsm->CALLSTACK[0].plstCFInstructions_local
600 = &(pAsm->pR700Shader->lstCFInstructions);
601
602 pAsm->CALLSTACK[0].max = 0;
603 pAsm->CALLSTACK[0].current = 0;
604
605 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
606
607 pAsm->unCFflags = 0;
608
609 pAsm->presubs = NULL;
610 pAsm->unPresubArraySize = 0;
611 pAsm->unNumPresub = 0;
612 pAsm->unCurNumILInsts = 0;
613
614 pAsm->unVetTexBits = 0;
615
616 return 0;
617 }
618
619 GLboolean IsTex(gl_inst_opcode Opcode)
620 {
621 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
622 (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) || (OPCODE_TXL==Opcode) )
623 {
624 return GL_TRUE;
625 }
626 return GL_FALSE;
627 }
628
629 GLboolean IsAlu(gl_inst_opcode Opcode)
630 {
631 //TODO : more for fc and ex for higher spec.
632 if( IsTex(Opcode) )
633 {
634 return GL_FALSE;
635 }
636 return GL_TRUE;
637 }
638
639 int check_current_clause(r700_AssemblerBase* pAsm,
640 CF_CLAUSE_TYPE new_clause_type)
641 {
642 if (pAsm->cf_current_clause_type != new_clause_type)
643 { //Close last open clause
644 switch (pAsm->cf_current_clause_type)
645 {
646 case CF_ALU_CLAUSE:
647 if ( pAsm->cf_current_alu_clause_ptr != NULL)
648 {
649 pAsm->cf_current_alu_clause_ptr = NULL;
650 }
651 break;
652 case CF_VTX_CLAUSE:
653 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
654 {
655 pAsm->cf_current_vtx_clause_ptr = NULL;
656 }
657 break;
658 case CF_TEX_CLAUSE:
659 if ( pAsm->cf_current_tex_clause_ptr != NULL)
660 {
661 pAsm->cf_current_tex_clause_ptr = NULL;
662 }
663 break;
664 case CF_EXPORT_CLAUSE:
665 if ( pAsm->cf_current_export_clause_ptr != NULL)
666 {
667 pAsm->cf_current_export_clause_ptr = NULL;
668 }
669 break;
670 case CF_OTHER_CLAUSE:
671 if ( pAsm->cf_current_cf_clause_ptr != NULL)
672 {
673 pAsm->cf_current_cf_clause_ptr = NULL;
674 }
675 break;
676 case CF_EMPTY_CLAUSE:
677 break;
678 default:
679 radeon_error(
680 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
681 return GL_FALSE;
682 }
683
684 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
685
686 // Create new clause
687 switch (new_clause_type)
688 {
689 case CF_ALU_CLAUSE:
690 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
691 break;
692 case CF_VTX_CLAUSE:
693 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
694 break;
695 case CF_TEX_CLAUSE:
696 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
697 break;
698 case CF_EXPORT_CLAUSE:
699 {
700 R700ControlFlowSXClause* pR700ControlFlowSXClause
701 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
702
703 // Add new export instruction to control flow program
704 if (pR700ControlFlowSXClause != 0)
705 {
706 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
707 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
708 AddCFInstruction( pAsm->pR700Shader,
709 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
710 }
711 else
712 {
713 radeon_error(
714 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
715 return GL_FALSE;
716 }
717 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
718 }
719 break;
720 case CF_EMPTY_CLAUSE:
721 break;
722 case CF_OTHER_CLAUSE:
723 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
724 break;
725 default:
726 radeon_error(
727 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
728 return GL_FALSE;
729 }
730 }
731
732 return GL_TRUE;
733 }
734
735 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
736 {
737 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
738 {
739 return GL_FALSE;
740 }
741
742 pAsm->cf_current_cf_clause_ptr =
743 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
744
745 if (pAsm->cf_current_cf_clause_ptr != NULL)
746 {
747 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
748 AddCFInstruction( pAsm->pR700Shader,
749 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
750 }
751 else
752 {
753 radeon_error("Could not allocate a new VFetch CF instruction.\n");
754 return GL_FALSE;
755 }
756
757 return GL_TRUE;
758 }
759
760 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
761 R700VertexInstruction* vertex_instruction_ptr)
762 {
763 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
764 {
765 return GL_FALSE;
766 }
767
768 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
769 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
770 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
771 ) )
772 {
773 // Create new Vfetch control flow instruction for this new clause
774 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
775
776 if (pAsm->cf_current_vtx_clause_ptr != NULL)
777 {
778 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
779 AddCFInstruction( pAsm->pR700Shader,
780 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
781 }
782 else
783 {
784 radeon_error("Could not allocate a new VFetch CF instruction.\n");
785 return GL_FALSE;
786 }
787
788 if(8 == pAsm->unAsic)
789 {
790 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, EG_CF_INST_VC,
791 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
792 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
793 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
794 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
795 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
796 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
797 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
798 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
799 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
800 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
801 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
802 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
803 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
804 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
805 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
806 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 1,
807 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
808 }
809 else
810 {
811 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
812 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
813 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
814 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
815 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
816 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
817 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
818 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
819 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
820 }
821
822 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
823 }
824 else
825 {
826 if(8 == pAsm->unAsic)
827 {
828 unsigned int count = GETbits(pAsm->cf_current_vtx_clause_ptr->m_Word1.val,
829 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
830 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count,
831 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
832 }
833 else
834 {
835 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
836 }
837 }
838
839 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
840
841 return GL_TRUE;
842 }
843
844 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
845 R700TextureInstruction* tex_instruction_ptr)
846 {
847 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
848 {
849 return GL_FALSE;
850 }
851
852 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
853 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
854 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
855 ) )
856 {
857 // new tex cf instruction for this new clause
858 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
859
860 if (pAsm->cf_current_tex_clause_ptr != NULL)
861 {
862 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
863 AddCFInstruction( pAsm->pR700Shader,
864 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
865 }
866 else
867 {
868 radeon_error("Could not allocate a new TEX CF instruction.\n");
869 return GL_FALSE;
870 }
871
872 if(8 == pAsm->unAsic)
873 {
874 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, EG_CF_INST_TC,
875 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
876 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
877 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
878 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
879 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
880 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
881 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
882 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
883 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
884 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
885 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
886 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
887 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
888 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
889 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
890 #ifdef FORCE_CF_TEX_BARRIER
891 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 1,
892 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
893 #else
894 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
895 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
896 #endif
897 }
898 else
899 {
900 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
901 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
902 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
903
904 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
905 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
906 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
907 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
908 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
909 }
910 }
911 else
912 {
913 if(8 == pAsm->unAsic)
914 {
915 unsigned int count = GETbits(pAsm->cf_current_tex_clause_ptr->m_Word1.val,
916 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
917 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count,
918 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
919 }
920 else
921 {
922 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
923 }
924 }
925
926 // If this clause constains any TEX instruction that is dependent on a
927 // previous instruction, set the barrier bit, also always set for vert
928 // programs as tex deps are not(yet) computed for them
929 if( pAsm->currentShaderType == SPT_VP || pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
930 {
931 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
932 }
933
934 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
935 {
936 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
937 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
938 }
939
940 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
941
942 return GL_TRUE;
943 }
944
945 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
946 GLuint gl_client_id,
947 GLuint destination_register,
948 GLuint number_of_elements,
949 GLenum dataElementType,
950 VTX_FETCH_METHOD* pFetchMethod)
951 {
952 GLuint client_size_inbyte;
953 GLuint data_format;
954 GLuint mega_fetch_count;
955 GLuint is_mega_fetch_flag;
956
957 R700VertexGenericFetch* vfetch_instruction_ptr;
958 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
959
960 if (assembled_vfetch_instruction_ptr == NULL)
961 {
962 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
963 if (vfetch_instruction_ptr == NULL)
964 {
965 return GL_FALSE;
966 }
967 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
968 }
969 else
970 {
971 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
972 }
973
974 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
975
976 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
977 {
978 //TODO : mini fetch
979 mega_fetch_count = 0;
980 is_mega_fetch_flag = 0;
981 }
982 else
983 {
984 mega_fetch_count = MEGA_FETCH_BYTES - 1;
985 is_mega_fetch_flag = 0x1;
986 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
987 }
988
989 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
990 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
991 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
992
993 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
994 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
995 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
996 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
997 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
998
999 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
1000 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1001 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1002 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
1003
1004 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1005
1006 // Destination register
1007 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
1008 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1009
1010 vfetch_instruction_ptr->m_Word2.f.offset = 0;
1011 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1012
1013 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
1014
1015 if (assembled_vfetch_instruction_ptr == NULL)
1016 {
1017 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1018 {
1019 return GL_FALSE;
1020 }
1021
1022 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
1023 {
1024 return GL_FALSE;
1025 }
1026 else
1027 {
1028 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
1029 }
1030 }
1031
1032 return GL_TRUE;
1033 }
1034
1035 GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
1036 GLuint destination_register,
1037 GLenum type,
1038 GLint size,
1039 GLubyte element,
1040 GLuint _signed,
1041 GLboolean normalize,
1042 GLenum format,
1043 VTX_FETCH_METHOD * pFetchMethod)
1044 {
1045 GLuint client_size_inbyte;
1046 GLuint data_format;
1047 GLuint mega_fetch_count;
1048 GLuint is_mega_fetch_flag;
1049
1050 GLuint dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w;
1051
1052 R700VertexGenericFetch* vfetch_instruction_ptr;
1053 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
1054 = pAsm->vfetch_instruction_ptr_array[element];
1055
1056 if (assembled_vfetch_instruction_ptr == NULL)
1057 {
1058 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1059 if (vfetch_instruction_ptr == NULL)
1060 {
1061 return GL_FALSE;
1062 }
1063 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1064 }
1065 else
1066 {
1067 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1068 }
1069
1070 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
1071
1072 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1073 {
1074 //TODO : mini fetch
1075 mega_fetch_count = 0;
1076 is_mega_fetch_flag = 0;
1077 }
1078 else
1079 {
1080 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1081 is_mega_fetch_flag = 0x1;
1082 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1083 }
1084
1085 SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VC_INST_FETCH,
1086 EG_VTX_WORD0__VC_INST_shift,
1087 EG_VTX_WORD0__VC_INST_mask);
1088 SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VTX_FETCH_VERTEX_DATA,
1089 EG_VTX_WORD0__FETCH_TYPE_shift,
1090 EG_VTX_WORD0__FETCH_TYPE_mask);
1091 CLEARbit(vfetch_instruction_ptr->m_Word0.val,
1092 EG_VTX_WORD0__FWQ_bit);
1093 SETfield(vfetch_instruction_ptr->m_Word0.val, element,
1094 EG_VTX_WORD0__BUFFER_ID_shift,
1095 EG_VTX_WORD0__BUFFER_ID_mask);
1096 SETfield(vfetch_instruction_ptr->m_Word0.val, 0x0,
1097 EG_VTX_WORD0__SRC_GPR_shift,
1098 EG_VTX_WORD0__SRC_GPR_mask);
1099 SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
1100 EG_VTX_WORD0__SRC_REL_shift,
1101 EG_VTX_WORD0__SRC_REL_bit);
1102 SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_SEL_X,
1103 EG_VTX_WORD0__SRC_SEL_X_shift,
1104 EG_VTX_WORD0__SRC_SEL_X_mask);
1105 SETfield(vfetch_instruction_ptr->m_Word0.val, mega_fetch_count,
1106 EG_VTX_WORD0__MFC_shift,
1107 EG_VTX_WORD0__MFC_mask);
1108
1109 if(format == GL_BGRA)
1110 {
1111 dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1112 dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1113 dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1114 dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1115 }
1116 else
1117 {
1118 dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1119 dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1120 dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1121 dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1122
1123 }
1124 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_x,
1125 EG_VTX_WORD1__DST_SEL_X_shift,
1126 EG_VTX_WORD1__DST_SEL_X_mask);
1127 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_y,
1128 EG_VTX_WORD1__DST_SEL_Y_shift,
1129 EG_VTX_WORD1__DST_SEL_Y_mask);
1130 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_z,
1131 EG_VTX_WORD1__DST_SEL_Z_shift,
1132 EG_VTX_WORD1__DST_SEL_Z_mask);
1133 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_w,
1134 EG_VTX_WORD1__DST_SEL_W_shift,
1135 EG_VTX_WORD1__DST_SEL_W_mask);
1136
1137 SETfield(vfetch_instruction_ptr->m_Word1.val, 0, /* use format here, in r6/r7, format used set in const, need to use same */
1138 EG_VTX_WORD1__UCF_shift,
1139 EG_VTX_WORD1__UCF_bit);
1140 SETfield(vfetch_instruction_ptr->m_Word1.val, data_format,
1141 EG_VTX_WORD1__DATA_FORMAT_shift,
1142 EG_VTX_WORD1__DATA_FORMAT_mask);
1143 #ifdef TEST_VFETCH
1144 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1145 EG_VTX_WORD1__FCA_shift,
1146 EG_VTX_WORD1__FCA_bit);
1147 #else
1148 if(1 == _signed)
1149 {
1150 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1151 EG_VTX_WORD1__FCA_shift,
1152 EG_VTX_WORD1__FCA_bit);
1153 }
1154 else
1155 {
1156 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_UNSIGNED,
1157 EG_VTX_WORD1__FCA_shift,
1158 EG_VTX_WORD1__FCA_bit);
1159 }
1160 #endif /* TEST_VFETCH */
1161
1162 if(GL_TRUE == normalize)
1163 {
1164 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_NORM,
1165 EG_VTX_WORD1__NFA_shift,
1166 EG_VTX_WORD1__NFA_mask);
1167 }
1168 else
1169 {
1170 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_SCALED,
1171 EG_VTX_WORD1__NFA_shift,
1172 EG_VTX_WORD1__NFA_mask);
1173 }
1174
1175 /* Destination register */
1176 SETfield(vfetch_instruction_ptr->m_Word1.val, destination_register,
1177 EG_VTX_WORD1_GPR__DST_GPR_shift,
1178 EG_VTX_WORD1_GPR__DST_GPR_mask);
1179 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_ABSOLUTE,
1180 EG_VTX_WORD1_GPR__DST_REL_shift,
1181 EG_VTX_WORD1_GPR__DST_REL_bit);
1182
1183
1184 SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1185 EG_VTX_WORD2__OFFSET_shift,
1186 EG_VTX_WORD2__OFFSET_mask);
1187 SETfield(vfetch_instruction_ptr->m_Word2.val, SQ_ENDIAN_NONE,
1188 EG_VTX_WORD2__ENDIAN_SWAP_shift,
1189 EG_VTX_WORD2__ENDIAN_SWAP_mask);
1190 SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1191 EG_VTX_WORD2__CBNS_shift,
1192 EG_VTX_WORD2__CBNS_bit);
1193 SETfield(vfetch_instruction_ptr->m_Word2.val, is_mega_fetch_flag,
1194 EG_VTX_WORD2__MEGA_FETCH_shift,
1195 EG_VTX_WORD2__MEGA_FETCH_mask);
1196
1197 if (assembled_vfetch_instruction_ptr == NULL)
1198 {
1199 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1200 {
1201 return GL_FALSE;
1202 }
1203
1204 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
1205 {
1206 return GL_FALSE;
1207 }
1208 else
1209 {
1210 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1211 }
1212 }
1213
1214 return GL_TRUE;
1215 }
1216
1217 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
1218 GLuint destination_register,
1219 GLenum type,
1220 GLint size,
1221 GLubyte element,
1222 GLuint _signed,
1223 GLboolean normalize,
1224 GLenum format,
1225 VTX_FETCH_METHOD * pFetchMethod)
1226 {
1227 GLuint client_size_inbyte;
1228 GLuint data_format;
1229 GLuint mega_fetch_count;
1230 GLuint is_mega_fetch_flag;
1231
1232 R700VertexGenericFetch* vfetch_instruction_ptr;
1233 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
1234 = pAsm->vfetch_instruction_ptr_array[element];
1235
1236 if (assembled_vfetch_instruction_ptr == NULL)
1237 {
1238 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1239 if (vfetch_instruction_ptr == NULL)
1240 {
1241 return GL_FALSE;
1242 }
1243 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1244 }
1245 else
1246 {
1247 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1248 }
1249
1250 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
1251
1252 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1253 {
1254 //TODO : mini fetch
1255 mega_fetch_count = 0;
1256 is_mega_fetch_flag = 0;
1257 }
1258 else
1259 {
1260 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1261 is_mega_fetch_flag = 0x1;
1262 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1263 }
1264
1265 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
1266 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
1267 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1268
1269 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
1270 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
1271 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1272 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
1273 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
1274
1275 if(format == GL_BGRA)
1276 {
1277 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1278 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1279 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1280 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1281 }
1282 else
1283 {
1284 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1285 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1286 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1287 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1288
1289 }
1290
1291 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1292 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
1293 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
1294
1295 if(1 == _signed)
1296 {
1297 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
1298 }
1299 else
1300 {
1301 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
1302 }
1303
1304 if(GL_TRUE == normalize)
1305 {
1306 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
1307 }
1308 else
1309 {
1310 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
1311 }
1312
1313 // Destination register
1314 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
1315 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1316
1317 vfetch_instruction_ptr->m_Word2.f.offset = 0;
1318 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1319
1320 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
1321
1322 if (assembled_vfetch_instruction_ptr == NULL)
1323 {
1324 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1325 {
1326 return GL_FALSE;
1327 }
1328
1329 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
1330 {
1331 return GL_FALSE;
1332 }
1333 else
1334 {
1335 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1336 }
1337 }
1338
1339 return GL_TRUE;
1340 }
1341
1342 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
1343 {
1344 GLint i;
1345 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
1346 pAsm->cf_current_vtx_clause_ptr = NULL;
1347
1348 for (i=0; i<VERT_ATTRIB_MAX; i++)
1349 {
1350 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
1351 }
1352
1353 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
1354
1355 return GL_TRUE;
1356 }
1357
1358 GLuint gethelpr(r700_AssemblerBase* pAsm)
1359 {
1360 GLuint r = pAsm->uHelpReg;
1361 pAsm->uHelpReg++;
1362 if (pAsm->uHelpReg > pAsm->number_used_registers)
1363 {
1364 pAsm->number_used_registers = pAsm->uHelpReg;
1365 }
1366 return r;
1367 }
1368 void resethelpr(r700_AssemblerBase* pAsm)
1369 {
1370 pAsm->uHelpReg = pAsm->uFirstHelpReg;
1371 }
1372
1373 void checkop_init(r700_AssemblerBase* pAsm)
1374 {
1375 resethelpr(pAsm);
1376 pAsm->aArgSubst[0] =
1377 pAsm->aArgSubst[1] =
1378 pAsm->aArgSubst[2] =
1379 pAsm->aArgSubst[3] = -1;
1380 }
1381
1382 static GLboolean next_ins(r700_AssemblerBase *pAsm)
1383 {
1384 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1385
1386 if (GL_TRUE == pAsm->is_tex)
1387 {
1388 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX)
1389 {
1390 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE))
1391 {
1392 radeon_error("Error assembling TEX instruction\n");
1393 return GL_FALSE;
1394 }
1395 }
1396 else
1397 {
1398 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE))
1399 {
1400 radeon_error("Error assembling TEX instruction\n");
1401 return GL_FALSE;
1402 }
1403 }
1404 }
1405 else
1406 { //ALU
1407 if (GL_FALSE == assemble_alu_instruction(pAsm))
1408 {
1409 radeon_error("Error assembling ALU instruction\n");
1410 return GL_FALSE;
1411 }
1412 }
1413
1414 if (pAsm->D.dst.rtype == DST_REG_OUT)
1415 {
1416 assert(pAsm->D.dst.reg >= pAsm->starting_export_register_number);
1417 }
1418
1419 //reset for next inst.
1420 pAsm->D.bits = 0;
1421 pAsm->D2.bits = 0;
1422 pAsm->S[0].bits = 0;
1423 pAsm->S[1].bits = 0;
1424 pAsm->S[2].bits = 0;
1425 pAsm->is_tex = GL_FALSE;
1426 pAsm->need_tex_barrier = GL_FALSE;
1427 pAsm->D2.bits = 0;
1428 pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
1429 return GL_TRUE;
1430 }
1431
1432 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
1433 {
1434 GLuint tmp = gethelpr(pAsm);
1435
1436 //mov src to temp helper gpr.
1437 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1438
1439 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1440
1441 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1442 pAsm->D.dst.reg = tmp;
1443
1444 nomask_PVSDST(&(pAsm->D.dst));
1445
1446 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1447 {
1448 return GL_FALSE;
1449 }
1450
1451 noswizzle_PVSSRC(&(pAsm->S[0].src));
1452 noneg_PVSSRC(&(pAsm->S[0].src));
1453
1454 if( GL_FALSE == next_ins(pAsm) )
1455 {
1456 return GL_FALSE;
1457 }
1458
1459 pAsm->aArgSubst[1 + src] = tmp;
1460
1461 return GL_TRUE;
1462 }
1463
1464 GLboolean checkop1(r700_AssemblerBase* pAsm)
1465 {
1466 checkop_init(pAsm);
1467 return GL_TRUE;
1468 }
1469
1470 GLboolean checkop2(r700_AssemblerBase* pAsm)
1471 {
1472 GLboolean bSrcConst[2];
1473 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1474
1475 checkop_init(pAsm);
1476
1477 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1478 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1479 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1480 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1481 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1482 {
1483 bSrcConst[0] = GL_TRUE;
1484 }
1485 else
1486 {
1487 bSrcConst[0] = GL_FALSE;
1488 }
1489 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1490 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1491 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1492 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1493 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1494 {
1495 bSrcConst[1] = GL_TRUE;
1496 }
1497 else
1498 {
1499 bSrcConst[1] = GL_FALSE;
1500 }
1501
1502 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1503 {
1504 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1505 {
1506 if( GL_FALSE == mov_temp(pAsm, 1) )
1507 {
1508 return GL_FALSE;
1509 }
1510 }
1511 }
1512
1513 return GL_TRUE;
1514 }
1515
1516 GLboolean checkop3(r700_AssemblerBase* pAsm)
1517 {
1518 GLboolean bSrcConst[3];
1519 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1520
1521 checkop_init(pAsm);
1522
1523 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1524 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1525 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1526 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1527 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1528 {
1529 bSrcConst[0] = GL_TRUE;
1530 }
1531 else
1532 {
1533 bSrcConst[0] = GL_FALSE;
1534 }
1535 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1536 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1537 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1538 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1539 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1540 {
1541 bSrcConst[1] = GL_TRUE;
1542 }
1543 else
1544 {
1545 bSrcConst[1] = GL_FALSE;
1546 }
1547 if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
1548 (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1549 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1550 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1551 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1552 {
1553 bSrcConst[2] = GL_TRUE;
1554 }
1555 else
1556 {
1557 bSrcConst[2] = GL_FALSE;
1558 }
1559
1560 if( (GL_TRUE == bSrcConst[0]) &&
1561 (GL_TRUE == bSrcConst[1]) &&
1562 (GL_TRUE == bSrcConst[2]) )
1563 {
1564 if( GL_FALSE == mov_temp(pAsm, 1) )
1565 {
1566 return GL_FALSE;
1567 }
1568 if( GL_FALSE == mov_temp(pAsm, 2) )
1569 {
1570 return GL_FALSE;
1571 }
1572
1573 return GL_TRUE;
1574 }
1575 else if( (GL_TRUE == bSrcConst[0]) &&
1576 (GL_TRUE == bSrcConst[1]) )
1577 {
1578 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1579 {
1580 if( GL_FALSE == mov_temp(pAsm, 1) )
1581 {
1582 return GL_FALSE;
1583 }
1584 }
1585
1586 return GL_TRUE;
1587 }
1588 else if ( (GL_TRUE == bSrcConst[0]) &&
1589 (GL_TRUE == bSrcConst[2]) )
1590 {
1591 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1592 {
1593 if( GL_FALSE == mov_temp(pAsm, 2) )
1594 {
1595 return GL_FALSE;
1596 }
1597 }
1598
1599 return GL_TRUE;
1600 }
1601 else if( (GL_TRUE == bSrcConst[1]) &&
1602 (GL_TRUE == bSrcConst[2]) )
1603 {
1604 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1605 {
1606 if( GL_FALSE == mov_temp(pAsm, 2) )
1607 {
1608 return GL_FALSE;
1609 }
1610 }
1611
1612 return GL_TRUE;
1613 }
1614
1615 return GL_TRUE;
1616 }
1617
1618 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1619 int src,
1620 int fld)
1621 {
1622 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1623
1624 if (fld == -1)
1625 {
1626 fld = src;
1627 }
1628
1629 if(pAsm->aArgSubst[1+src] >= 0)
1630 {
1631 assert(fld >= 0);
1632 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1633 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1634 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1635 }
1636 else
1637 {
1638 if (1 == pILInst->SrcReg[src].RelAddr)
1639 {
1640 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1641 }
1642 else
1643 {
1644 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1645 }
1646 switch (pILInst->SrcReg[src].File)
1647 {
1648 case PROGRAM_TEMPORARY:
1649 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1650 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1651 break;
1652 case PROGRAM_CONSTANT:
1653 case PROGRAM_LOCAL_PARAM:
1654 case PROGRAM_ENV_PARAM:
1655 case PROGRAM_STATE_VAR:
1656 case PROGRAM_UNIFORM:
1657 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1658 if(pILInst->SrcReg[src].Index < 0)
1659 {
1660 WARN_ONCE("Negative register offsets not supported yet!\n");
1661 pAsm->S[fld].src.reg = 0;
1662 }
1663 else
1664 {
1665 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1666 }
1667 break;
1668 case PROGRAM_INPUT:
1669 pAsm->S[fld].src.rtype = SRC_REG_GPR;
1670 switch (pAsm->currentShaderType)
1671 {
1672 case SPT_FP:
1673 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1674 break;
1675 case SPT_VP:
1676 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1677 break;
1678 }
1679 break;
1680 case PROGRAM_OUTPUT:
1681 pAsm->S[fld].src.rtype = SRC_REG_GPR;
1682 switch (pAsm->currentShaderType)
1683 {
1684 case SPT_FP:
1685 pAsm->S[fld].src.reg = pAsm->uiFP_OutputMap[pILInst->SrcReg[src].Index];
1686 break;
1687 case SPT_VP:
1688 pAsm->S[fld].src.reg = pAsm->ucVP_OutputMap[pILInst->SrcReg[src].Index];
1689 break;
1690 }
1691 break;
1692 default:
1693 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1694 return GL_FALSE;
1695 }
1696 }
1697
1698 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1699 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1700 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1701 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1702
1703 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1704 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1705 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1706 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1707
1708 return GL_TRUE;
1709 }
1710
1711 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1712 {
1713 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1714 switch (pILInst->DstReg.File)
1715 {
1716 case PROGRAM_TEMPORARY:
1717 if (1 == pILInst->DstReg.RelAddr)
1718 {
1719 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1720 }
1721 else
1722 {
1723 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1724 }
1725 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1726 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1727 break;
1728 case PROGRAM_ADDRESS:
1729 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1730 pAsm->D.dst.rtype = DST_REG_A0;
1731 pAsm->D.dst.reg = 0;
1732 break;
1733 case PROGRAM_OUTPUT:
1734 if (1 == pILInst->DstReg.RelAddr)
1735 {
1736 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1737 }
1738 else
1739 {
1740 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1741 }
1742 pAsm->D.dst.rtype = DST_REG_OUT;
1743 switch (pAsm->currentShaderType)
1744 {
1745 case SPT_FP:
1746 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1747 break;
1748 case SPT_VP:
1749 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1750 break;
1751 }
1752 break;
1753 default:
1754 radeon_error("Invalid destination output argument type\n");
1755 return GL_FALSE;
1756 }
1757
1758 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1759 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1760 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1761 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1762
1763 if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1764 {
1765 pAsm->D2.dst2.SaturateMode = 1;
1766 }
1767 else
1768 {
1769 pAsm->D2.dst2.SaturateMode = 0;
1770 }
1771
1772 return GL_TRUE;
1773 }
1774
1775 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1776 {
1777 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1778
1779 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1780 {
1781 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1782 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1783
1784 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1785 }
1786 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1787 {
1788 pAsm->D.dst.rtype = DST_REG_OUT;
1789 switch (pAsm->currentShaderType)
1790 {
1791 case SPT_FP:
1792 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1793 break;
1794 case SPT_VP:
1795 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1796 break;
1797 }
1798
1799 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1800 }
1801 else
1802 {
1803 radeon_error("Invalid destination output argument type\n");
1804 return GL_FALSE;
1805 }
1806
1807 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1808 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1809 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1810 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1811
1812 return GL_TRUE;
1813 }
1814
1815 GLboolean tex_src(r700_AssemblerBase *pAsm)
1816 {
1817 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1818
1819 GLboolean bValidTexCoord = GL_FALSE;
1820
1821 if(pAsm->aArgSubst[1] >= 0)
1822 {
1823 bValidTexCoord = GL_TRUE;
1824 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1825 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1826 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1827 }
1828 else
1829 {
1830 switch (pILInst->SrcReg[0].File) {
1831 case PROGRAM_UNIFORM:
1832 case PROGRAM_CONSTANT:
1833 case PROGRAM_LOCAL_PARAM:
1834 case PROGRAM_ENV_PARAM:
1835 case PROGRAM_STATE_VAR:
1836 break;
1837 case PROGRAM_TEMPORARY:
1838 bValidTexCoord = GL_TRUE;
1839 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1840 pAsm->starting_temp_register_number;
1841 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1842 break;
1843 case PROGRAM_INPUT:
1844 if(SPT_VP == pAsm->currentShaderType)
1845 {
1846 switch (pILInst->SrcReg[0].Index)
1847 {
1848 case VERT_ATTRIB_TEX0:
1849 case VERT_ATTRIB_TEX1:
1850 case VERT_ATTRIB_TEX2:
1851 case VERT_ATTRIB_TEX3:
1852 case VERT_ATTRIB_TEX4:
1853 case VERT_ATTRIB_TEX5:
1854 case VERT_ATTRIB_TEX6:
1855 case VERT_ATTRIB_TEX7:
1856 bValidTexCoord = GL_TRUE;
1857 pAsm->S[0].src.reg =
1858 pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
1859 pAsm->S[0].src.rtype = SRC_REG_GPR;
1860 break;
1861 }
1862 }
1863 else
1864 {
1865 switch (pILInst->SrcReg[0].Index)
1866 {
1867 case FRAG_ATTRIB_WPOS:
1868 case FRAG_ATTRIB_COL0:
1869 case FRAG_ATTRIB_COL1:
1870 case FRAG_ATTRIB_FOGC:
1871 case FRAG_ATTRIB_TEX0:
1872 case FRAG_ATTRIB_TEX1:
1873 case FRAG_ATTRIB_TEX2:
1874 case FRAG_ATTRIB_TEX3:
1875 case FRAG_ATTRIB_TEX4:
1876 case FRAG_ATTRIB_TEX5:
1877 case FRAG_ATTRIB_TEX6:
1878 case FRAG_ATTRIB_TEX7:
1879 bValidTexCoord = GL_TRUE;
1880 pAsm->S[0].src.reg =
1881 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1882 pAsm->S[0].src.rtype = SRC_REG_GPR;
1883 break;
1884 case FRAG_ATTRIB_FACE:
1885 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1886 break;
1887 case FRAG_ATTRIB_PNTC:
1888 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1889 break;
1890 }
1891
1892 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1893 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1894 {
1895 bValidTexCoord = GL_TRUE;
1896 pAsm->S[0].src.reg =
1897 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1898 pAsm->S[0].src.rtype = SRC_REG_GPR;
1899 }
1900 }
1901
1902 break;
1903 }
1904 }
1905
1906 if(GL_TRUE == bValidTexCoord)
1907 {
1908 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1909 }
1910 else
1911 {
1912 radeon_error("Invalid source texcoord for TEX instruction\n");
1913 return GL_FALSE;
1914 }
1915
1916 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1917 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1918 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1919 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1920
1921 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1922 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1923 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1924 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1925
1926 return GL_TRUE;
1927 }
1928
1929 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1930 {
1931 PVSSRC * texture_coordinate_source;
1932 PVSSRC * texture_unit_source;
1933
1934 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1935 if (tex_instruction_ptr == NULL)
1936 {
1937 return GL_FALSE;
1938 }
1939 Init_R700TextureInstruction(tex_instruction_ptr);
1940
1941 texture_coordinate_source = &(pAsm->S[0].src);
1942 texture_unit_source = &(pAsm->S[1].src);
1943
1944 if(8 == pAsm->unAsic) /* evergreen */
1945 {
1946
1947 SETfield(tex_instruction_ptr->m_Word0.val, pAsm->D.dst.opcode,
1948 EG_TEX_WORD0__TEX_INST_shift,
1949 EG_TEX_WORD0__TEX_INST_mask);
1950
1951 if( (SQ_TEX_INST_GET_GRADIENTS_H == pAsm->D.dst.opcode)
1952 ||(SQ_TEX_INST_GET_GRADIENTS_V == pAsm->D.dst.opcode) )
1953 {
1954 /* Use fine texel derivative calculation rather than use quad derivative */
1955 SETfield(tex_instruction_ptr->m_Word0.val, 1,
1956 EG_TEX_WORD0__INST_MOD_shift,
1957 EG_TEX_WORD0__INST_MOD_mask);
1958 }
1959 else
1960 {
1961 SETfield(tex_instruction_ptr->m_Word0.val, 0,
1962 EG_TEX_WORD0__INST_MOD_shift,
1963 EG_TEX_WORD0__INST_MOD_mask);
1964 }
1965
1966 CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__FWQ_bit);
1967
1968 if(SPT_VP == pAsm->currentShaderType)
1969 {
1970 SETfield(tex_instruction_ptr->m_Word0.val, (texture_unit_source->reg + VERT_ATTRIB_MAX),
1971 EG_TEX_WORD0__RESOURCE_ID_shift,
1972 EG_TEX_WORD0__RESOURCE_ID_mask);
1973 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
1974 }
1975 else
1976 {
1977 SETfield(tex_instruction_ptr->m_Word0.val, texture_unit_source->reg,
1978 EG_TEX_WORD0__RESOURCE_ID_shift,
1979 EG_TEX_WORD0__RESOURCE_ID_mask);
1980 }
1981
1982 CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__ALT_CONST_bit);
1983 SETfield(tex_instruction_ptr->m_Word0.val, 0,
1984 EG_TEX_WORD0__RIM_shift,
1985 EG_TEX_WORD0__RIM_mask);
1986 SETfield(tex_instruction_ptr->m_Word0.val, 0,
1987 EG_TEX_WORD0__SIM_shift,
1988 EG_TEX_WORD0__SIM_mask);
1989 }
1990 else
1991 {
1992 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1993 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1994 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1995 tex_instruction_ptr->m_Word0.f.alt_const = 0;
1996
1997 if(SPT_VP == pAsm->currentShaderType)
1998 {
1999 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
2000 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
2001 }
2002 else
2003 {
2004 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
2005 }
2006 }
2007
2008 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
2009 if (normalized) {
2010 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
2011 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
2012 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
2013 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
2014 } else {
2015 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
2016 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
2017 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
2018 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
2019 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
2020 }
2021
2022 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
2023 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
2024 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
2025 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
2026
2027 // dst
2028 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2029 (pAsm->D.dst.rtype == DST_REG_OUT) )
2030 {
2031 if(8 == pAsm->unAsic) /* evergreen */
2032 {
2033 SETfield(tex_instruction_ptr->m_Word0.val, texture_coordinate_source->reg,
2034 EG_TEX_WORD0__SRC_GPR_shift,
2035 EG_TEX_WORD0__SRC_GPR_mask);
2036 SETfield(tex_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
2037 EG_TEX_WORD0__SRC_REL_shift,
2038 EG_TEX_WORD0__SRC_REL_bit);
2039 }
2040 else
2041 {
2042 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
2043 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
2044 }
2045
2046 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2047 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
2048
2049 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
2050 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
2051 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
2052 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
2053
2054
2055 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
2056 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
2057 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
2058 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
2059 }
2060 else
2061 {
2062 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
2063 return GL_FALSE;
2064 }
2065
2066 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
2067 {
2068 return GL_FALSE;
2069 }
2070
2071 return GL_TRUE;
2072 }
2073
2074 void initialize(r700_AssemblerBase *pAsm)
2075 {
2076 GLuint cycle, component;
2077
2078 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
2079 {
2080 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
2081 {
2082 pAsm->hw_gpr[cycle][component] = (-1);
2083 }
2084 }
2085 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
2086 {
2087 pAsm->hw_cfile_addr[component] = (-1);
2088 pAsm->hw_cfile_chan[component] = (-1);
2089 }
2090 }
2091
2092 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
2093 int source_index,
2094 PVSSRC* pSource,
2095 BITS scalar_channel_index,
2096 r700_AssemblerBase *pAsm)
2097 {
2098 BITS src_sel;
2099 BITS src_rel;
2100 BITS src_chan;
2101 BITS src_neg;
2102
2103 //--------------------------------------------------------------------------
2104 // Source for operands src0, src1.
2105 // Values [0,127] correspond to GPR[0..127].
2106 // Values [256,511] correspond to cfile constants c[0..255].
2107
2108 //--------------------------------------------------------------------------
2109 // Other special values are shown in the list below.
2110
2111 // 248 SQ_ALU_SRC_0: special constant 0.0.
2112 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
2113
2114 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
2115 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
2116
2117 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
2118 // 253 SQ_ALU_SRC_LITERAL: literal constant.
2119
2120 // 254 SQ_ALU_SRC_PV: previous vector result.
2121 // 255 SQ_ALU_SRC_PS: previous scalar result.
2122 //--------------------------------------------------------------------------
2123
2124 BITS channel_swizzle;
2125 switch (scalar_channel_index)
2126 {
2127 case 0: channel_swizzle = pSource->swizzlex; break;
2128 case 1: channel_swizzle = pSource->swizzley; break;
2129 case 2: channel_swizzle = pSource->swizzlez; break;
2130 case 3: channel_swizzle = pSource->swizzlew; break;
2131 default: channel_swizzle = SQ_SEL_MASK; break;
2132 }
2133
2134 if(channel_swizzle == SQ_SEL_0)
2135 {
2136 src_sel = SQ_ALU_SRC_0;
2137 }
2138 else if (channel_swizzle == SQ_SEL_1)
2139 {
2140 src_sel = SQ_ALU_SRC_1;
2141 }
2142 else
2143 {
2144 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
2145 (pSource->rtype == SRC_REG_GPR)
2146 )
2147 {
2148 src_sel = pSource->reg;
2149 }
2150 else if (pSource->rtype == SRC_REG_CONSTANT)
2151 {
2152 /* TODO : 4 const buffers */
2153 if(GL_TRUE == pAsm->bUseMemConstant)
2154 {
2155 src_sel = pSource->reg + SQ_ALU_SRC_KCACHE0_BASE;
2156 pAsm->kcacheUsed = SQ_ALU_SRC_KCACHE0_BASE;
2157 }
2158 else
2159 {
2160 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
2161 }
2162 }
2163 else if (pSource->rtype == SRC_REC_LITERAL)
2164 {
2165 src_sel = SQ_ALU_SRC_LITERAL;
2166 }
2167 else
2168 {
2169 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
2170 source_index, pSource->rtype);
2171 return GL_FALSE;
2172 }
2173 }
2174
2175 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
2176 {
2177 src_rel = SQ_ABSOLUTE;
2178 }
2179 else
2180 {
2181 src_rel = SQ_RELATIVE;
2182 }
2183
2184 switch (channel_swizzle)
2185 {
2186 case SQ_SEL_X:
2187 src_chan = SQ_CHAN_X;
2188 break;
2189 case SQ_SEL_Y:
2190 src_chan = SQ_CHAN_Y;
2191 break;
2192 case SQ_SEL_Z:
2193 src_chan = SQ_CHAN_Z;
2194 break;
2195 case SQ_SEL_W:
2196 src_chan = SQ_CHAN_W;
2197 break;
2198 case SQ_SEL_0:
2199 case SQ_SEL_1:
2200 // Does not matter since src_sel controls
2201 src_chan = SQ_CHAN_X;
2202 break;
2203 default:
2204 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
2205 return GL_FALSE;
2206 break;
2207 }
2208
2209 switch (scalar_channel_index)
2210 {
2211 case 0: src_neg = pSource->negx; break;
2212 case 1: src_neg = pSource->negy; break;
2213 case 2: src_neg = pSource->negz; break;
2214 case 3: src_neg = pSource->negw; break;
2215 default: src_neg = 0; break;
2216 }
2217
2218 switch (source_index)
2219 {
2220 case 0:
2221 assert(alu_instruction_ptr);
2222 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
2223 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
2224 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
2225 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
2226 break;
2227 case 1:
2228 assert(alu_instruction_ptr);
2229 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
2230 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
2231 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
2232 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
2233 break;
2234 case 2:
2235 assert(alu_instruction_ptr);
2236 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
2237 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
2238 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
2239 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
2240 break;
2241 default:
2242 radeon_error("Only three sources allowed in ALU opcodes.\n");
2243 return GL_FALSE;
2244 break;
2245 }
2246
2247 return GL_TRUE;
2248 }
2249
2250 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
2251 R700ALUInstruction* alu_instruction_ptr,
2252 GLuint contiguous_slots_needed)
2253 {
2254 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
2255 {
2256 return GL_FALSE;
2257 }
2258
2259 if ( pAsm->alu_x_opcode != 0 ||
2260 pAsm->cf_current_alu_clause_ptr == NULL ||
2261 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
2262 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
2263 ) )
2264 {
2265
2266 //new cf inst for this clause
2267 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
2268
2269 // link the new cf to cf segment
2270 if(NULL != pAsm->cf_current_alu_clause_ptr)
2271 {
2272 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
2273 AddCFInstruction( pAsm->pR700Shader,
2274 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
2275 }
2276 else
2277 {
2278 radeon_error("Could not allocate a new ALU CF instruction.\n");
2279 return GL_FALSE;
2280 }
2281
2282 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2283 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2284 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
2285
2286 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2287 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2288 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2289
2290 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
2291
2292 if(pAsm->alu_x_opcode != 0)
2293 {
2294 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
2295 pAsm->alu_x_opcode = 0;
2296 }
2297 else
2298 {
2299 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
2300 }
2301
2302 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
2303
2304 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
2305 }
2306 else
2307 {
2308 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
2309 }
2310
2311 /* TODO : handle 4 bufs */
2312 if( (pAsm->kcacheUsed > 0) && (GL_TRUE == pAsm->bUseMemConstant) )
2313 {
2314 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2315 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2316 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_LOCK_2;
2317 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2318 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2319 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2320 }
2321
2322 // If this clause constains any instruction that is forward dependent on a TEX instruction,
2323 // set the whole_quad_mode for this clause
2324 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
2325 {
2326 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
2327 }
2328
2329 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
2330 {
2331 alu_instruction_ptr->m_Word0.f.last = 1;
2332 }
2333
2334 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
2335 {
2336 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
2337 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
2338 }
2339
2340 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
2341
2342 return GL_TRUE;
2343 }
2344
2345 GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm)
2346 {
2347 R700ALUInstruction * alu_instruction_ptr = NULL;
2348
2349 int ui;
2350 unsigned int uj;
2351 unsigned int unWord0Temp = 0x380C00;
2352 unsigned int unWord1Temp = 0x146B10; //SQ_SEL_X
2353
2354 if(pAsm->uIIns > 0)
2355 {
2356 for(ui=(pAsm->uIIns-1); ui>=0; ui--)
2357 {
2358 for(uj=0; uj<8; uj++)
2359 {
2360 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2361 Init_R700ALUInstruction(alu_instruction_ptr);
2362 alu_instruction_ptr->m_Word0.val = unWord0Temp;
2363 alu_instruction_ptr->m_Word1.val = unWord1Temp;
2364
2365 if(uj < 4)
2366 {
2367 SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_ZW,
2368 EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2369 }
2370 else
2371 {
2372 SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_XY,
2373 EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2374 }
2375 if( (uj > 1) && (uj < 6) )
2376 {
2377 SETfield(alu_instruction_ptr->m_Word1.val, 1,
2378 EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2379 }
2380 else
2381 {
2382 SETfield(alu_instruction_ptr->m_Word1.val, 0,
2383 EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2384 }
2385 if( (uj > 1) && (uj < 6) )
2386 {
2387 SETfield(alu_instruction_ptr->m_Word1.val, ui,
2388 EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2389 }
2390 else
2391 {
2392 SETfield(alu_instruction_ptr->m_Word1.val, 111,
2393 EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2394 }
2395
2396 SETfield(alu_instruction_ptr->m_Word1.val, (uj % 4),
2397 EG_ALU_WORD1__DST_CHAN_shift, EG_ALU_WORD1__DST_CHAN_mask);
2398 SETfield(alu_instruction_ptr->m_Word0.val, (1 - (uj % 2)),
2399 EG_ALU_WORD0__SRC0_CHAN_shift, EG_ALU_WORD0__SRC0_CHAN_mask);
2400 SETfield(alu_instruction_ptr->m_Word0.val, (EG_ALU_SRC_PARAM_BASE + ui),
2401 EG_ALU_WORD0__SRC1_SEL_shift, EG_ALU_WORD0__SRC1_SEL_mask);
2402 if(3 == (uj % 4))
2403 {
2404 SETfield(alu_instruction_ptr->m_Word0.val, 1,
2405 EG_ALU_WORD0__LAST_shift, EG_ALU_WORD0__LAST_bit);
2406 }
2407
2408 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, 4) )
2409 {
2410 return GL_FALSE;
2411 }
2412 }
2413 }
2414 }
2415
2416 return GL_TRUE;
2417 }
2418
2419 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
2420 int source_index,
2421 BITS* psrc_sel,
2422 BITS* psrc_rel,
2423 BITS* psrc_chan,
2424 BITS* psrc_neg)
2425 {
2426 switch (source_index)
2427 {
2428 case 0:
2429 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
2430 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
2431 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
2432 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
2433 break;
2434
2435 case 1:
2436 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
2437 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
2438 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
2439 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
2440 break;
2441
2442 case 2:
2443 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
2444 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
2445 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
2446 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
2447 break;
2448 }
2449 }
2450
2451 int is_cfile(BITS sel)
2452 {
2453 if (sel > 255 && sel < 512)
2454 {
2455 return 1;
2456 }
2457 return 0;
2458 }
2459
2460 int is_const(BITS sel)
2461 {
2462 if (is_cfile(sel))
2463 {
2464 return 1;
2465 }
2466 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
2467 {
2468 return 1;
2469 }
2470 return 0;
2471 }
2472
2473 int is_gpr(BITS sel)
2474 {
2475 if (sel >= 0 && sel < 128)
2476 {
2477 return 1;
2478 }
2479 return 0;
2480 }
2481
2482 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
2483 SQ_ALU_VEC_120, //001
2484 SQ_ALU_VEC_102, //010
2485
2486 SQ_ALU_VEC_201, //011
2487 SQ_ALU_VEC_012, //100
2488 SQ_ALU_VEC_021, //101
2489
2490 SQ_ALU_VEC_012, //110
2491 SQ_ALU_VEC_012}; //111
2492
2493 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
2494 SQ_ALU_SCL_122, //001
2495 SQ_ALU_SCL_122, //010
2496
2497 SQ_ALU_SCL_221, //011
2498 SQ_ALU_SCL_212, //100
2499 SQ_ALU_SCL_122, //101
2500
2501 SQ_ALU_SCL_122, //110
2502 SQ_ALU_SCL_122}; //111
2503
2504 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
2505 GLuint sel,
2506 GLuint chan)
2507 {
2508 int res_match = (-1);
2509 int res_empty = (-1);
2510
2511 GLint res;
2512
2513 for (res=3; res>=0; res--)
2514 {
2515 if(pAsm->hw_cfile_addr[ res] < 0)
2516 {
2517 res_empty = res;
2518 }
2519 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
2520 &&
2521 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
2522 {
2523 res_match = res;
2524 }
2525 }
2526
2527 if(res_match >= 0)
2528 {
2529 // Read for this scalar component already reserved, nothing to do here.
2530 ;
2531 }
2532 else if(res_empty >= 0)
2533 {
2534 pAsm->hw_cfile_addr[ res_empty ] = sel;
2535 pAsm->hw_cfile_chan[ res_empty ] = chan;
2536 }
2537 else
2538 {
2539 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2540 return GL_FALSE;
2541 }
2542 return GL_TRUE;
2543 }
2544
2545 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
2546 {
2547 if(pAsm->hw_gpr[cycle][chan] < 0)
2548 {
2549 pAsm->hw_gpr[cycle][chan] = sel;
2550 }
2551 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
2552 {
2553 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2554 return GL_FALSE;
2555 }
2556
2557 return GL_TRUE;
2558 }
2559
2560 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2561 {
2562 switch (swiz)
2563 {
2564 case SQ_ALU_SCL_210:
2565 {
2566 int table[3] = {2, 1, 0};
2567 *pCycle = table[sel];
2568 return GL_TRUE;
2569 }
2570 break;
2571 case SQ_ALU_SCL_122:
2572 {
2573 int table[3] = {1, 2, 2};
2574 *pCycle = table[sel];
2575 return GL_TRUE;
2576 }
2577 break;
2578 case SQ_ALU_SCL_212:
2579 {
2580 int table[3] = {2, 1, 2};
2581 *pCycle = table[sel];
2582 return GL_TRUE;
2583 }
2584 break;
2585 case SQ_ALU_SCL_221:
2586 {
2587 int table[3] = {2, 2, 1};
2588 *pCycle = table[sel];
2589 return GL_TRUE;
2590 }
2591 break;
2592 default:
2593 radeon_error("Bad Scalar bank swizzle value\n");
2594 break;
2595 }
2596
2597 return GL_FALSE;
2598 }
2599
2600 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2601 {
2602 switch (swiz)
2603 {
2604 case SQ_ALU_VEC_012:
2605 {
2606 int table[3] = {0, 1, 2};
2607 *pCycle = table[sel];
2608 }
2609 break;
2610 case SQ_ALU_VEC_021:
2611 {
2612 int table[3] = {0, 2, 1};
2613 *pCycle = table[sel];
2614 }
2615 break;
2616 case SQ_ALU_VEC_120:
2617 {
2618 int table[3] = {1, 2, 0};
2619 *pCycle = table[sel];
2620 }
2621 break;
2622 case SQ_ALU_VEC_102:
2623 {
2624 int table[3] = {1, 0, 2};
2625 *pCycle = table[sel];
2626 }
2627 break;
2628 case SQ_ALU_VEC_201:
2629 {
2630 int table[3] = {2, 0, 1};
2631 *pCycle = table[sel];
2632 }
2633 break;
2634 case SQ_ALU_VEC_210:
2635 {
2636 int table[3] = {2, 1, 0};
2637 *pCycle = table[sel];
2638 }
2639 break;
2640 default:
2641 radeon_error("Bad Vec bank swizzle value\n");
2642 return GL_FALSE;
2643 break;
2644 }
2645
2646 return GL_TRUE;
2647 }
2648
2649 GLboolean check_scalar(r700_AssemblerBase* pAsm,
2650 R700ALUInstruction* alu_instruction_ptr)
2651 {
2652 GLuint cycle;
2653 GLuint bank_swizzle;
2654 GLuint const_count = 0;
2655
2656 BITS sel;
2657 BITS chan;
2658 BITS rel;
2659 BITS neg;
2660
2661 GLuint src;
2662
2663 BITS src_sel [3] = {0,0,0};
2664 BITS src_chan[3] = {0,0,0};
2665 BITS src_rel [3] = {0,0,0};
2666 BITS src_neg [3] = {0,0,0};
2667
2668 GLuint swizzle_key;
2669 GLuint number_of_operands;
2670
2671 if(8 == pAsm->unAsic)
2672 {
2673 number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2674 }
2675 else
2676 {
2677 number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2678 }
2679
2680 for (src=0; src<number_of_operands; src++)
2681 {
2682 get_src_properties(alu_instruction_ptr,
2683 src,
2684 &(src_sel[src]),
2685 &(src_rel[src]),
2686 &(src_chan[src]),
2687 &(src_neg[src]) );
2688 }
2689
2690
2691 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2692 (is_const( src_sel[1] ) ? 2 : 0) +
2693 (is_const( src_sel[2] ) ? 1 : 0) );
2694
2695 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2696
2697 for (src=0; src<number_of_operands; src++)
2698 {
2699 sel = src_sel [src];
2700 chan = src_chan[src];
2701 rel = src_rel [src];
2702 neg = src_neg [src];
2703
2704 if (is_const( sel ))
2705 {
2706 // Any constant, including literal and inline constants
2707 const_count++;
2708
2709 if (is_cfile( sel ))
2710 {
2711 reserve_cfile(pAsm, sel, chan);
2712 }
2713
2714 }
2715 }
2716
2717 for (src=0; src<number_of_operands; src++)
2718 {
2719 sel = src_sel [src];
2720 chan = src_chan[src];
2721 rel = src_rel [src];
2722 neg = src_neg [src];
2723
2724 if( is_gpr(sel) )
2725 {
2726 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2727
2728 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2729 {
2730 return GL_FALSE;
2731 }
2732
2733 if(cycle < const_count)
2734 {
2735 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2736 {
2737 return GL_FALSE;
2738 }
2739 }
2740 }
2741 }
2742
2743 return GL_TRUE;
2744 }
2745
2746 GLboolean check_vector(r700_AssemblerBase* pAsm,
2747 R700ALUInstruction* alu_instruction_ptr)
2748 {
2749 GLuint cycle;
2750 GLuint bank_swizzle;
2751 GLuint const_count = 0;
2752
2753 GLuint src;
2754
2755 BITS sel;
2756 BITS chan;
2757 BITS rel;
2758 BITS neg;
2759
2760 BITS src_sel [3] = {0,0,0};
2761 BITS src_chan[3] = {0,0,0};
2762 BITS src_rel [3] = {0,0,0};
2763 BITS src_neg [3] = {0,0,0};
2764
2765 GLuint swizzle_key;
2766 GLuint number_of_operands;
2767
2768 if(8 == pAsm->unAsic)
2769 {
2770 number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2771 }
2772 else
2773 {
2774 number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2775 }
2776
2777 for (src=0; src<number_of_operands; src++)
2778 {
2779 get_src_properties(alu_instruction_ptr,
2780 src,
2781 &(src_sel[src]),
2782 &(src_rel[src]),
2783 &(src_chan[src]),
2784 &(src_neg[src]) );
2785 }
2786
2787
2788 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2789 (is_const( src_sel[1] ) ? 2 : 0) +
2790 (is_const( src_sel[2] ) ? 1 : 0)
2791 );
2792
2793 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2794
2795 for (src=0; src<number_of_operands; src++)
2796 {
2797 sel = src_sel [src];
2798 chan = src_chan[src];
2799 rel = src_rel [src];
2800 neg = src_neg [src];
2801
2802
2803 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2804
2805 if( is_gpr(sel) )
2806 {
2807 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2808 {
2809 return GL_FALSE;
2810 }
2811
2812 if ( (src == 1) &&
2813 (sel == src_sel[0]) &&
2814 (chan == src_chan[0]) )
2815 {
2816 }
2817 else
2818 {
2819 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2820 {
2821 return GL_FALSE;
2822 }
2823 }
2824 }
2825 else if( is_const(sel) )
2826 {
2827 const_count++;
2828
2829 if( is_cfile(sel) )
2830 {
2831 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2832 {
2833 return GL_FALSE;
2834 }
2835 }
2836 }
2837 }
2838
2839 return GL_TRUE;
2840 }
2841
2842 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2843 {
2844 R700ALUInstruction * alu_instruction_ptr = NULL;
2845 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2846 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2847
2848 GLuint number_of_scalar_operations;
2849 GLboolean is_single_scalar_operation;
2850 GLuint scalar_channel_index;
2851
2852 PVSSRC * pcurrent_source;
2853 int current_source_index;
2854 GLuint contiguous_slots_needed;
2855 GLuint uNumSrc;
2856 GLboolean bSplitInst;
2857
2858 if(8 == pAsm->unAsic)
2859 {
2860 uNumSrc = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2861 }
2862 else
2863 {
2864 uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2865 }
2866
2867 //GLuint channel_swizzle, j;
2868 //GLuint chan_counter[4] = {0, 0, 0, 0};
2869 //PVSSRC * pSource[3];
2870 bSplitInst = GL_FALSE;
2871 pAsm->kcacheUsed = 0;
2872
2873 if (1 == pAsm->D.dst.math)
2874 {
2875 is_single_scalar_operation = GL_TRUE;
2876 number_of_scalar_operations = 1;
2877 }
2878 else
2879 {
2880 is_single_scalar_operation = GL_FALSE;
2881 number_of_scalar_operations = 4;
2882
2883 /* current assembler doesn't do more than 1 register per source */
2884 #if 0
2885 /* check read port, only very preliminary algorithm, not count in
2886 src0/1 same comp case and prev slot repeat case; also not count relative
2887 addressing. TODO: improve performance. */
2888 for(j=0; j<uNumSrc; j++)
2889 {
2890 pSource[j] = &(pAsm->S[j].src);
2891 }
2892 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2893 {
2894 for(j=0; j<uNumSrc; j++)
2895 {
2896 switch (scalar_channel_index)
2897 {
2898 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2899 case 1: channel_swizzle = pSource[j]->swizzley; break;
2900 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2901 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2902 default: channel_swizzle = SQ_SEL_MASK; break;
2903 }
2904 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2905 (pSource[j]->rtype == SRC_REG_GPR))
2906 && (channel_swizzle <= SQ_SEL_W) )
2907 {
2908 chan_counter[channel_swizzle]++;
2909 }
2910 }
2911 }
2912 if( (chan_counter[SQ_SEL_X] > 3)
2913 || (chan_counter[SQ_SEL_Y] > 3)
2914 || (chan_counter[SQ_SEL_Z] > 3)
2915 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2916 {
2917 bSplitInst = GL_TRUE;
2918 }
2919 #endif
2920 }
2921
2922 contiguous_slots_needed = 0;
2923
2924 if(!is_single_scalar_operation)
2925 {
2926 contiguous_slots_needed = 4;
2927 }
2928
2929 contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2930
2931 initialize(pAsm);
2932
2933 for (scalar_channel_index=0;
2934 scalar_channel_index < number_of_scalar_operations;
2935 scalar_channel_index++)
2936 {
2937 if(scalar_channel_index == (number_of_scalar_operations-1))
2938 {
2939 switch(pAsm->D2.dst2.literal_slots)
2940 {
2941 case 0:
2942 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2943 Init_R700ALUInstruction(alu_instruction_ptr);
2944 break;
2945 case 1:
2946 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2947 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2948 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2949 break;
2950 case 2:
2951 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2952 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2953 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2954 break;
2955 };
2956 }
2957 else
2958 {
2959 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2960 Init_R700ALUInstruction(alu_instruction_ptr);
2961 }
2962
2963 //src 0
2964 current_source_index = 0;
2965 pcurrent_source = &(pAsm->S[0].src);
2966
2967 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2968 current_source_index,
2969 pcurrent_source,
2970 scalar_channel_index,
2971 pAsm) )
2972 {
2973 return GL_FALSE;
2974 }
2975
2976 if (uNumSrc > 1)
2977 {
2978 // Process source 1
2979 current_source_index = 1;
2980 pcurrent_source = &(pAsm->S[current_source_index].src);
2981
2982 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2983 current_source_index,
2984 pcurrent_source,
2985 scalar_channel_index,
2986 pAsm) )
2987 {
2988 return GL_FALSE;
2989 }
2990 }
2991
2992 //other bits
2993 alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
2994
2995 if( (is_single_scalar_operation == GL_TRUE)
2996 || (GL_TRUE == bSplitInst) )
2997 {
2998 alu_instruction_ptr->m_Word0.f.last = 1;
2999 }
3000 else
3001 {
3002 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
3003 }
3004
3005 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
3006 if(1 == pAsm->D.dst.predicated)
3007 {
3008 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
3009 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
3010 }
3011 else
3012 {
3013 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3014 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3015 }
3016
3017 // dst
3018 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
3019 (pAsm->D.dst.rtype == DST_REG_OUT) )
3020 {
3021 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
3022 }
3023 else
3024 {
3025 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
3026 return GL_FALSE;
3027 }
3028
3029 if ( ADDR_RELATIVE_A0 == addrmode_PVSDST(&(pAsm->D.dst)) )
3030 {
3031 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_RELATIVE;
3032 }
3033 else
3034 {
3035 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
3036 }
3037
3038 if ( is_single_scalar_operation == GL_TRUE )
3039 {
3040 // Override scalar_channel_index since only one scalar value will be written
3041 if(pAsm->D.dst.writex)
3042 {
3043 scalar_channel_index = 0;
3044 }
3045 else if(pAsm->D.dst.writey)
3046 {
3047 scalar_channel_index = 1;
3048 }
3049 else if(pAsm->D.dst.writez)
3050 {
3051 scalar_channel_index = 2;
3052 }
3053 else if(pAsm->D.dst.writew)
3054 {
3055 scalar_channel_index = 3;
3056 }
3057 }
3058
3059 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
3060
3061 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
3062
3063 if (pAsm->D.dst.op3)
3064 {
3065 //op3
3066
3067 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
3068
3069 //There's 3rd src for op3
3070 current_source_index = 2;
3071 pcurrent_source = &(pAsm->S[current_source_index].src);
3072
3073 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
3074 current_source_index,
3075 pcurrent_source,
3076 scalar_channel_index,
3077 pAsm) )
3078 {
3079 return GL_FALSE;
3080 }
3081 }
3082 else
3083 {
3084 //op2
3085 if (pAsm->bR6xx)
3086 {
3087 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
3088
3089 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
3090 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
3091
3092 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
3093 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
3094 switch (scalar_channel_index)
3095 {
3096 case 0:
3097 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
3098 break;
3099 case 1:
3100 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
3101 break;
3102 case 2:
3103 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
3104 break;
3105 case 3:
3106 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
3107 break;
3108 default:
3109 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
3110 break;
3111 }
3112 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
3113 }
3114 else
3115 {
3116 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
3117
3118 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
3119 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
3120
3121 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3122 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3123 switch (scalar_channel_index)
3124 {
3125 case 0:
3126 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
3127 break;
3128 case 1:
3129 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
3130 break;
3131 case 2:
3132 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
3133 break;
3134 case 3:
3135 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
3136 break;
3137 default:
3138 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
3139 break;
3140 }
3141 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
3142 }
3143 }
3144
3145 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
3146 {
3147 return GL_FALSE;
3148 }
3149
3150 /*
3151 * Judge the type of current instruction, is it vector or scalar
3152 * instruction.
3153 */
3154 if (is_single_scalar_operation)
3155 {
3156 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
3157 {
3158 return GL_FALSE;
3159 }
3160 }
3161 else
3162 {
3163 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
3164 {
3165 return GL_FALSE;
3166 }
3167 }
3168
3169 contiguous_slots_needed -= 1;
3170 }
3171
3172 return GL_TRUE;
3173 }
3174
3175 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
3176 {
3177 BITS tmp;
3178
3179 checkop1(pAsm);
3180
3181 tmp = gethelpr(pAsm);
3182
3183 // opcode tmp.x, a.x
3184 // MOV dst, tmp.x
3185
3186 pAsm->D.dst.opcode = opcode;
3187 pAsm->D.dst.math = 1;
3188
3189 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3190 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3191 pAsm->D.dst.reg = tmp;
3192 pAsm->D.dst.writex = 1;
3193
3194 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3195 {
3196 return GL_FALSE;
3197 }
3198
3199 if( pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_RSQ )
3200 pAsm->S[0].src.abs = 1;
3201
3202 if ( GL_FALSE == next_ins(pAsm) )
3203 {
3204 return GL_FALSE;
3205 }
3206
3207 // Now replicate result to all necessary channels in destination
3208 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3209
3210 if( GL_FALSE == assemble_dst(pAsm) )
3211 {
3212 return GL_FALSE;
3213 }
3214
3215 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3216 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3217 pAsm->S[0].src.reg = tmp;
3218
3219 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3220 noneg_PVSSRC(&(pAsm->S[0].src));
3221
3222 if( GL_FALSE == next_ins(pAsm) )
3223 {
3224 return GL_FALSE;
3225 }
3226
3227 return GL_TRUE;
3228 }
3229
3230 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
3231 {
3232 checkop1(pAsm);
3233
3234 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3235
3236 if( GL_FALSE == assemble_dst(pAsm) )
3237 {
3238 return GL_FALSE;
3239 }
3240 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3241 {
3242 return GL_FALSE;
3243 }
3244
3245 pAsm->S[1].bits = pAsm->S[0].bits;
3246 flipneg_PVSSRC(&(pAsm->S[1].src));
3247
3248 if ( GL_FALSE == next_ins(pAsm) )
3249 {
3250 return GL_FALSE;
3251 }
3252
3253 return GL_TRUE;
3254 }
3255
3256 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
3257 {
3258 if( GL_FALSE == checkop2(pAsm) )
3259 {
3260 return GL_FALSE;
3261 }
3262
3263 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3264
3265 if( GL_FALSE == assemble_dst(pAsm) )
3266 {
3267 return GL_FALSE;
3268 }
3269
3270 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3271 {
3272 return GL_FALSE;
3273 }
3274
3275 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3276 {
3277 return GL_FALSE;
3278 }
3279
3280 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
3281 {
3282 flipneg_PVSSRC(&(pAsm->S[1].src));
3283 }
3284
3285 if( GL_FALSE == next_ins(pAsm) )
3286 {
3287 return GL_FALSE;
3288 }
3289
3290 return GL_TRUE;
3291 }
3292
3293 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
3294 { /* TODO: ar values dont' persist between clauses */
3295 if( GL_FALSE == checkop1(pAsm) )
3296 {
3297 return GL_FALSE;
3298 }
3299
3300 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
3301 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3302 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3303 pAsm->D.dst.reg = 0;
3304 pAsm->D.dst.writex = 0;
3305 pAsm->D.dst.writey = 0;
3306 pAsm->D.dst.writez = 0;
3307 pAsm->D.dst.writew = 0;
3308
3309 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3310 {
3311 return GL_FALSE;
3312 }
3313
3314 if( GL_FALSE == next_ins(pAsm) )
3315 {
3316 return GL_FALSE;
3317 }
3318
3319 return GL_TRUE;
3320 }
3321
3322 GLboolean assemble_BAD(char *opcode_str)
3323 {
3324 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
3325 return GL_FALSE;
3326 }
3327
3328 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
3329 {
3330 int tmp;
3331
3332 if( GL_FALSE == checkop3(pAsm) )
3333 {
3334 return GL_FALSE;
3335 }
3336
3337 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
3338 pAsm->D.dst.op3 = 1;
3339
3340 tmp = (-1);
3341
3342 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3343 {
3344 //OP3 has no support for write mask
3345 tmp = gethelpr(pAsm);
3346
3347 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3348 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3349 pAsm->D.dst.reg = tmp;
3350
3351 nomask_PVSDST(&(pAsm->D.dst));
3352 }
3353 else
3354 {
3355 if( GL_FALSE == assemble_dst(pAsm) )
3356 {
3357 return GL_FALSE;
3358 }
3359 }
3360
3361 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3362 {
3363 return GL_FALSE;
3364 }
3365
3366 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
3367 {
3368 return GL_FALSE;
3369 }
3370
3371 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
3372 {
3373 return GL_FALSE;
3374 }
3375
3376 if ( GL_FALSE == next_ins(pAsm) )
3377 {
3378 return GL_FALSE;
3379 }
3380
3381 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3382 {
3383 if( GL_FALSE == assemble_dst(pAsm) )
3384 {
3385 return GL_FALSE;
3386 }
3387
3388 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3389
3390 //tmp for source
3391 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3392 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3393 pAsm->S[0].src.reg = tmp;
3394
3395 noneg_PVSSRC(&(pAsm->S[0].src));
3396 noswizzle_PVSSRC(&(pAsm->S[0].src));
3397
3398 if( GL_FALSE == next_ins(pAsm) )
3399 {
3400 return GL_FALSE;
3401 }
3402 }
3403
3404 return GL_TRUE;
3405 }
3406
3407 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
3408 {
3409 /*
3410 * r600 - trunc to -PI..PI range
3411 * r700 - normalize by dividing by 2PI
3412 * see fdo bug 27901
3413 */
3414
3415 int tmp;
3416 checkop1(pAsm);
3417
3418 tmp = gethelpr(pAsm);
3419
3420 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3421 pAsm->D.dst.op3 = 1;
3422
3423 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3424 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3425 pAsm->D.dst.reg = tmp;
3426
3427 assemble_src(pAsm, 0, -1);
3428
3429 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3430 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3431
3432 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3433 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3434
3435 pAsm->D2.dst2.literal_slots = 1;
3436 pAsm->C[0].f = 1/(3.1415926535 * 2);
3437 pAsm->C[1].f = 0.5f;
3438
3439 if ( GL_FALSE == next_ins(pAsm) )
3440 {
3441 return GL_FALSE;
3442 }
3443
3444 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3445
3446 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3447 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3448 pAsm->D.dst.reg = tmp;
3449 pAsm->D.dst.writex = 1;
3450
3451 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3452 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3453 pAsm->S[0].src.reg = tmp;
3454 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3455
3456 if(( GL_FALSE == next_ins(pAsm) ))
3457 {
3458 return GL_FALSE;
3459 }
3460 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3461 pAsm->D.dst.op3 = 1;
3462
3463 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3464 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3465 pAsm->D.dst.reg = tmp;
3466
3467 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3468 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3469 pAsm->S[0].src.reg = tmp;
3470 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3471
3472 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3473 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3474
3475 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3476 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3477
3478 pAsm->D2.dst2.literal_slots = 1;
3479
3480 if (pAsm->bR6xx)
3481 {
3482 pAsm->C[0].f = 3.1415926535897f * 2.0f;
3483 pAsm->C[1].f = -3.1415926535897f;
3484 }
3485 else
3486 {
3487 pAsm->C[0].f = 1.0f;
3488 pAsm->C[1].f = -0.5f;
3489 }
3490
3491 if(( GL_FALSE == next_ins(pAsm) ))
3492 {
3493 return GL_FALSE;
3494 }
3495
3496 pAsm->D.dst.opcode = opcode;
3497 pAsm->D.dst.math = 1;
3498
3499 assemble_dst(pAsm);
3500
3501 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3502 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3503 pAsm->S[0].src.reg = tmp;
3504 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3505 noneg_PVSSRC(&(pAsm->S[0].src));
3506
3507 next_ins(pAsm);
3508
3509 //TODO - replicate if more channels set in WriteMask
3510 return GL_TRUE;
3511
3512 }
3513
3514 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
3515 {
3516 if( GL_FALSE == checkop2(pAsm) )
3517 {
3518 return GL_FALSE;
3519 }
3520
3521 if(8 == pAsm->unAsic)
3522 {
3523 pAsm->D.dst.opcode = EG_OP2_INST_DOT4;
3524 }
3525 else
3526 {
3527 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
3528 }
3529
3530 if( GL_FALSE == assemble_dst(pAsm) )
3531 {
3532 return GL_FALSE;
3533 }
3534
3535 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3536 {
3537 return GL_FALSE;
3538 }
3539
3540 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3541 {
3542 return GL_FALSE;
3543 }
3544
3545 if(OPCODE_DP2 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3546 {
3547 zerocomp_PVSSRC(&(pAsm->S[0].src),2);
3548 zerocomp_PVSSRC(&(pAsm->S[0].src),3);
3549 zerocomp_PVSSRC(&(pAsm->S[1].src),2);
3550 zerocomp_PVSSRC(&(pAsm->S[1].src),3);
3551 }
3552 else if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3553 {
3554 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3555 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3556 }
3557 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
3558 {
3559 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3560 }
3561
3562 if ( GL_FALSE == next_ins(pAsm) )
3563 {
3564 return GL_FALSE;
3565 }
3566
3567 return GL_TRUE;
3568 }
3569
3570 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3571 {
3572 if( GL_FALSE == checkop2(pAsm) )
3573 {
3574 return GL_FALSE;
3575 }
3576
3577 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3578
3579 if( GL_FALSE == assemble_dst(pAsm) )
3580 {
3581 return GL_FALSE;
3582 }
3583
3584 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3585 {
3586 return GL_FALSE;
3587 }
3588
3589 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3590 {
3591 return GL_FALSE;
3592 }
3593
3594 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3595 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3596
3597 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3598 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3599
3600 if ( GL_FALSE == next_ins(pAsm) )
3601 {
3602 return GL_FALSE;
3603 }
3604
3605 return GL_TRUE;
3606 }
3607
3608 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3609 {
3610 if(8 == pAsm->unAsic)
3611 {
3612 return assemble_math_function(pAsm, EG_OP2_INST_EXP_IEEE);
3613 }
3614
3615 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3616 }
3617
3618 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3619 {
3620 BITS tmp;
3621
3622 checkop1(pAsm);
3623
3624 tmp = gethelpr(pAsm);
3625
3626 // FLOOR tmp.x, a.x
3627 // EX2 dst.x tmp.x
3628
3629 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3630 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3631
3632 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3633 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3634 pAsm->D.dst.reg = tmp;
3635 pAsm->D.dst.writex = 1;
3636
3637 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3638 {
3639 return GL_FALSE;
3640 }
3641
3642 if( GL_FALSE == next_ins(pAsm) )
3643 {
3644 return GL_FALSE;
3645 }
3646
3647 if(8 == pAsm->unAsic)
3648 {
3649 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3650 }
3651 else
3652 {
3653 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3654 }
3655 pAsm->D.dst.math = 1;
3656
3657 if( GL_FALSE == assemble_dst(pAsm) )
3658 {
3659 return GL_FALSE;
3660 }
3661
3662 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3663
3664 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3665 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3666 pAsm->S[0].src.reg = tmp;
3667
3668 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3669 noneg_PVSSRC(&(pAsm->S[0].src));
3670
3671 if( GL_FALSE == next_ins(pAsm) )
3672 {
3673 return GL_FALSE;
3674 }
3675 }
3676
3677 // FRACT dst.y a.x
3678
3679 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3680 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3681
3682 if( GL_FALSE == assemble_dst(pAsm) )
3683 {
3684 return GL_FALSE;
3685 }
3686
3687 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3688 {
3689 return GL_FALSE;
3690 }
3691
3692 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3693
3694 if( GL_FALSE == next_ins(pAsm) )
3695 {
3696 return GL_FALSE;
3697 }
3698 }
3699
3700 // EX2 dst.z, a.x
3701
3702 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3703 if(8 == pAsm->unAsic)
3704 {
3705 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3706 }
3707 else
3708 {
3709 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3710 }
3711 pAsm->D.dst.math = 1;
3712
3713 if( GL_FALSE == assemble_dst(pAsm) )
3714 {
3715 return GL_FALSE;
3716 }
3717
3718 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3719 {
3720 return GL_FALSE;
3721 }
3722
3723 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3724
3725 if( GL_FALSE == next_ins(pAsm) )
3726 {
3727 return GL_FALSE;
3728 }
3729 }
3730
3731 // MOV dst.w 1.0
3732
3733 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3734 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3735
3736 if( GL_FALSE == assemble_dst(pAsm) )
3737 {
3738 return GL_FALSE;
3739 }
3740
3741 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3742
3743 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3744 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3745 pAsm->S[0].src.reg = tmp;
3746
3747 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3748 noneg_PVSSRC(&(pAsm->S[0].src));
3749
3750 if( GL_FALSE == next_ins(pAsm) )
3751 {
3752 return GL_FALSE;
3753 }
3754 }
3755
3756 return GL_TRUE;
3757 }
3758
3759 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3760 {
3761 checkop1(pAsm);
3762
3763 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3764
3765 if ( GL_FALSE == assemble_dst(pAsm) )
3766 {
3767 return GL_FALSE;
3768 }
3769
3770 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3771 {
3772 return GL_FALSE;
3773 }
3774
3775 if ( GL_FALSE == next_ins(pAsm) )
3776 {
3777 return GL_FALSE;
3778 }
3779
3780 return GL_TRUE;
3781 }
3782
3783 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3784 {
3785 if(8 == pAsm->unAsic)
3786 {
3787 return assemble_math_function(pAsm, EG_OP2_INST_FLT_TO_INT);
3788 }
3789
3790 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3791 }
3792
3793 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3794 {
3795 checkop1(pAsm);
3796
3797 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3798
3799 if ( GL_FALSE == assemble_dst(pAsm) )
3800 {
3801 return GL_FALSE;
3802 }
3803
3804 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3805 {
3806 return GL_FALSE;
3807 }
3808
3809 if ( GL_FALSE == next_ins(pAsm) )
3810 {
3811 return GL_FALSE;
3812 }
3813
3814 return GL_TRUE;
3815 }
3816
3817 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3818 {
3819 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3820
3821 if(pILInst->Opcode == OPCODE_KIL)
3822 checkop1(pAsm);
3823
3824 pAsm->D.dst.opcode = opcode;
3825 //pAsm->D.dst.math = 1;
3826
3827 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3828 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3829 pAsm->D.dst.reg = 0;
3830 pAsm->D.dst.writex = 0;
3831 pAsm->D.dst.writey = 0;
3832 pAsm->D.dst.writez = 0;
3833 pAsm->D.dst.writew = 0;
3834
3835 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3836 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3837 pAsm->S[0].src.reg = 0;
3838 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3839 noneg_PVSSRC(&(pAsm->S[0].src));
3840
3841 if(pILInst->Opcode == OPCODE_KIL_NV)
3842 {
3843 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3844 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3845 pAsm->S[1].src.reg = 0;
3846 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3847 neg_PVSSRC(&(pAsm->S[1].src));
3848 }
3849 else
3850 {
3851 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3852 {
3853 return GL_FALSE;
3854 }
3855
3856 }
3857
3858 if ( GL_FALSE == next_ins(pAsm) )
3859 {
3860 return GL_FALSE;
3861 }
3862
3863 /* Doc says KILL has to be last(end) ALU clause */
3864 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3865 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3866
3867 return GL_TRUE;
3868 }
3869
3870 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3871 {
3872 if(8 == pAsm->unAsic)
3873 {
3874 return assemble_math_function(pAsm, EG_OP2_INST_LOG_IEEE);
3875 }
3876
3877 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3878 }
3879
3880 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3881 {
3882 BITS tmp;
3883
3884 if( GL_FALSE == checkop3(pAsm) )
3885 {
3886 return GL_FALSE;
3887 }
3888
3889 tmp = gethelpr(pAsm);
3890
3891 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3892
3893 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3894 pAsm->D.dst.reg = tmp;
3895 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3896 nomask_PVSDST(&(pAsm->D.dst));
3897
3898
3899 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3900 {
3901 return GL_FALSE;
3902 }
3903
3904 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3905 {
3906 return GL_FALSE;
3907 }
3908
3909 neg_PVSSRC(&(pAsm->S[1].src));
3910
3911 if( GL_FALSE == next_ins(pAsm) )
3912 {
3913 return GL_FALSE;
3914 }
3915
3916 if(8 == pAsm->unAsic)
3917 {
3918 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
3919 }
3920 else
3921 {
3922 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3923 }
3924 pAsm->D.dst.op3 = 1;
3925
3926 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3927 pAsm->D.dst.reg = tmp;
3928 nomask_PVSDST(&(pAsm->D.dst));
3929 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3930
3931 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3932 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3933 pAsm->S[0].src.reg = tmp;
3934 noswizzle_PVSSRC(&(pAsm->S[0].src));
3935
3936
3937 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3938 {
3939 return GL_FALSE;
3940 }
3941
3942 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3943 {
3944 return GL_FALSE;
3945 }
3946
3947 if( GL_FALSE == next_ins(pAsm) )
3948 {
3949 return GL_FALSE;
3950 }
3951
3952 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3953
3954 if( GL_FALSE == assemble_dst(pAsm) )
3955 {
3956 return GL_FALSE;
3957 }
3958
3959 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3960 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3961 pAsm->S[0].src.reg = tmp;
3962 noswizzle_PVSSRC(&(pAsm->S[0].src));
3963
3964 if( GL_FALSE == next_ins(pAsm) )
3965 {
3966 return GL_FALSE;
3967 }
3968
3969 return GL_TRUE;
3970 }
3971
3972 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3973 {
3974 BITS tmp1, tmp2, tmp3;
3975
3976 checkop1(pAsm);
3977
3978 tmp1 = gethelpr(pAsm);
3979 tmp2 = gethelpr(pAsm);
3980 tmp3 = gethelpr(pAsm);
3981
3982 // FIXME: The hardware can do fabs() directly on input
3983 // elements, but the compiler doesn't have the
3984 // capability to use that.
3985
3986 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3987
3988 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3989
3990 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3991 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3992 pAsm->D.dst.reg = tmp1;
3993 pAsm->D.dst.writex = 1;
3994
3995 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3996 {
3997 return GL_FALSE;
3998 }
3999
4000 pAsm->S[1].bits = pAsm->S[0].bits;
4001 flipneg_PVSSRC(&(pAsm->S[1].src));
4002
4003 if ( GL_FALSE == next_ins(pAsm) )
4004 {
4005 return GL_FALSE;
4006 }
4007
4008 // Entire algo:
4009 //
4010 // LG2 tmp2.x, tmp1.x
4011 // FLOOR tmp3.x, tmp2.x
4012 // MOV dst.x, tmp3.x
4013 // ADD tmp3.x, tmp2.x, -tmp3.x
4014 // EX2 dst.y, tmp3.x
4015 // MOV dst.z, tmp2.x
4016 // MOV dst.w, 1.0
4017
4018 // LG2 tmp2.x, tmp1.x
4019 // FLOOR tmp3.x, tmp2.x
4020
4021 if(8 == pAsm->unAsic)
4022 {
4023 pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4024 }
4025 else
4026 {
4027 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4028 }
4029 pAsm->D.dst.math = 1;
4030
4031 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4032 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4033 pAsm->D.dst.reg = tmp2;
4034 pAsm->D.dst.writex = 1;
4035
4036 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4037 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4038 pAsm->S[0].src.reg = tmp1;
4039
4040 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4041 noneg_PVSSRC(&(pAsm->S[0].src));
4042
4043 if( GL_FALSE == next_ins(pAsm) )
4044 {
4045 return GL_FALSE;
4046 }
4047
4048 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
4049
4050 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4051 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4052 pAsm->D.dst.reg = tmp3;
4053 pAsm->D.dst.writex = 1;
4054
4055 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4056 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4057 pAsm->S[0].src.reg = tmp2;
4058
4059 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4060 noneg_PVSSRC(&(pAsm->S[0].src));
4061
4062 if( GL_FALSE == next_ins(pAsm) )
4063 {
4064 return GL_FALSE;
4065 }
4066
4067 // MOV dst.x, tmp3.x
4068
4069 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4070
4071 if( GL_FALSE == assemble_dst(pAsm) )
4072 {
4073 return GL_FALSE;
4074 }
4075
4076 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4077
4078 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4079 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4080 pAsm->S[0].src.reg = tmp3;
4081
4082 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4083 noneg_PVSSRC(&(pAsm->S[0].src));
4084
4085 if( GL_FALSE == next_ins(pAsm) )
4086 {
4087 return GL_FALSE;
4088 }
4089
4090 // ADD tmp3.x, tmp2.x, -tmp3.x
4091 // EX2 dst.y, tmp3.x
4092
4093 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4094
4095 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4096 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4097 pAsm->D.dst.reg = tmp3;
4098 pAsm->D.dst.writex = 1;
4099
4100 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4101 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4102 pAsm->S[0].src.reg = tmp2;
4103
4104 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4105 noneg_PVSSRC(&(pAsm->S[0].src));
4106
4107 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4108 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
4109 pAsm->S[1].src.reg = tmp3;
4110
4111 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4112 neg_PVSSRC(&(pAsm->S[1].src));
4113
4114 if( GL_FALSE == next_ins(pAsm) )
4115 {
4116 return GL_FALSE;
4117 }
4118
4119 if(8 == pAsm->unAsic)
4120 {
4121 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4122 }
4123 else
4124 {
4125 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4126 }
4127 pAsm->D.dst.math = 1;
4128
4129 if( GL_FALSE == assemble_dst(pAsm) )
4130 {
4131 return GL_FALSE;
4132 }
4133
4134 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4135
4136 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4137 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4138 pAsm->S[0].src.reg = tmp3;
4139
4140 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4141 noneg_PVSSRC(&(pAsm->S[0].src));
4142
4143 if( GL_FALSE == next_ins(pAsm) )
4144 {
4145 return GL_FALSE;
4146 }
4147
4148 // MOV dst.z, tmp2.x
4149
4150 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4151
4152 if( GL_FALSE == assemble_dst(pAsm) )
4153 {
4154 return GL_FALSE;
4155 }
4156
4157 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
4158
4159 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4160 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4161 pAsm->S[0].src.reg = tmp2;
4162
4163 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4164 noneg_PVSSRC(&(pAsm->S[0].src));
4165
4166 if( GL_FALSE == next_ins(pAsm) )
4167 {
4168 return GL_FALSE;
4169 }
4170
4171 // MOV dst.w 1.0
4172
4173 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4174
4175 if( GL_FALSE == assemble_dst(pAsm) )
4176 {
4177 return GL_FALSE;
4178 }
4179
4180 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
4181
4182 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4183 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4184 pAsm->S[0].src.reg = tmp1;
4185
4186 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
4187 noneg_PVSSRC(&(pAsm->S[0].src));
4188
4189 if( GL_FALSE == next_ins(pAsm) )
4190 {
4191 return GL_FALSE;
4192 }
4193
4194 return GL_TRUE;
4195 }
4196
4197 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
4198 {
4199 int tmp, ii;
4200 GLboolean bReplaceDst = GL_FALSE;
4201 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4202
4203 if( GL_FALSE == checkop3(pAsm) )
4204 {
4205 return GL_FALSE;
4206 }
4207
4208 if(8 == pAsm->unAsic)
4209 {
4210 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4211 }
4212 else
4213 {
4214 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4215 }
4216 pAsm->D.dst.op3 = 1;
4217
4218 tmp = (-1);
4219
4220 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
4221 { /* TODO : more investigation on MAD src and dst using same register */
4222 for(ii=0; ii<3; ii++)
4223 {
4224 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
4225 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
4226 {
4227 bReplaceDst = GL_TRUE;
4228 break;
4229 }
4230 }
4231 }
4232 if(0xF != pILInst->DstReg.WriteMask)
4233 { /* OP3 has no support for write mask */
4234 bReplaceDst = GL_TRUE;
4235 }
4236
4237 if(GL_TRUE == bReplaceDst)
4238 {
4239 tmp = gethelpr(pAsm);
4240
4241 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4242 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4243 pAsm->D.dst.reg = tmp;
4244
4245 nomask_PVSDST(&(pAsm->D.dst));
4246 }
4247 else
4248 {
4249 if( GL_FALSE == assemble_dst(pAsm) )
4250 {
4251 return GL_FALSE;
4252 }
4253 }
4254
4255 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4256 {
4257 return GL_FALSE;
4258 }
4259
4260 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4261 {
4262 return GL_FALSE;
4263 }
4264
4265 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
4266 {
4267 return GL_FALSE;
4268 }
4269
4270 if ( GL_FALSE == next_ins(pAsm) )
4271 {
4272 return GL_FALSE;
4273 }
4274
4275 if (GL_TRUE == bReplaceDst)
4276 {
4277 if( GL_FALSE == assemble_dst(pAsm) )
4278 {
4279 return GL_FALSE;
4280 }
4281
4282 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4283
4284 //tmp for source
4285 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4286 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4287 pAsm->S[0].src.reg = tmp;
4288
4289 noneg_PVSSRC(&(pAsm->S[0].src));
4290 noswizzle_PVSSRC(&(pAsm->S[0].src));
4291
4292 if( GL_FALSE == next_ins(pAsm) )
4293 {
4294 return GL_FALSE;
4295 }
4296 }
4297
4298 return GL_TRUE;
4299 }
4300
4301 /* LIT dst, src */
4302 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
4303 {
4304 unsigned int dstReg;
4305 unsigned int dstType;
4306 checkop1(pAsm);
4307 int tmp = gethelpr(pAsm);
4308
4309 if( GL_FALSE == assemble_dst(pAsm) )
4310 {
4311 return GL_FALSE;
4312 }
4313 dstReg = pAsm->D.dst.reg;
4314 dstType = pAsm->D.dst.rtype;
4315
4316 /* dst.xw, <- 1.0 */
4317 if( pAsm->D.dst.writex || pAsm->D.dst.writew )
4318 {
4319 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4320 {
4321 return GL_FALSE;
4322 }
4323
4324 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4325 pAsm->D.dst.writey = 0;
4326 pAsm->D.dst.writez = 0;
4327 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4328 pAsm->S[0].src.reg = tmp;
4329 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4330 noneg_PVSSRC(&(pAsm->S[0].src));
4331 pAsm->S[0].src.swizzlex = SQ_SEL_1;
4332 pAsm->S[0].src.swizzley = SQ_SEL_1;
4333 pAsm->S[0].src.swizzlez = SQ_SEL_1;
4334 pAsm->S[0].src.swizzlew = SQ_SEL_1;
4335 if( GL_FALSE == next_ins(pAsm) )
4336 {
4337 return GL_FALSE;
4338 }
4339 }
4340
4341 if( GL_FALSE == assemble_dst(pAsm) )
4342 {
4343 return GL_FALSE;
4344 }
4345
4346 if( pAsm->D.dst.writey ) {
4347
4348 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4349 {
4350 return GL_FALSE;
4351 }
4352
4353 /* dst.y = max(src.x, 0.0) */
4354 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4355 pAsm->D.dst.writex = 0;
4356 pAsm->D.dst.writey = 1;
4357 pAsm->D.dst.writez = 0;
4358 pAsm->D.dst.writew = 0;
4359 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4360 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4361 pAsm->S[1].src.reg = tmp;
4362 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4363 noneg_PVSSRC(&(pAsm->S[1].src));
4364 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4365 pAsm->S[1].src.swizzley = SQ_SEL_0;
4366 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4367 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4368 if( GL_FALSE == next_ins(pAsm) )
4369 {
4370 return GL_FALSE;
4371 }
4372 }
4373
4374 if( GL_FALSE == assemble_dst(pAsm) )
4375 {
4376 return GL_FALSE;
4377 }
4378 if ( pAsm->D.dst.writez) {
4379
4380 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4381 {
4382 return GL_FALSE;
4383 }
4384
4385 /* dst.z = log(src.y) */
4386 if(8 == pAsm->unAsic)
4387 {
4388 pAsm->D.dst.opcode = EG_OP2_INST_LOG_CLAMPED;
4389 }
4390 else
4391 {
4392 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
4393 }
4394 pAsm->D.dst.math = 1;
4395 pAsm->D.dst.writex = 0;
4396 pAsm->D.dst.writey = 0;
4397 pAsm->D.dst.writez = 1;
4398 pAsm->D.dst.writew = 0;
4399 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
4400 if( GL_FALSE == next_ins(pAsm) )
4401 {
4402 return GL_FALSE;
4403 }
4404
4405 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4406 {
4407 return GL_FALSE;
4408 }
4409
4410 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
4411 {
4412 return GL_FALSE;
4413 }
4414
4415 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4416
4417 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4418
4419 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4420 if(8 == pAsm->unAsic)
4421 {
4422 pAsm->D.dst.opcode = EG_OP3_INST_MUL_LIT;
4423 }
4424 else
4425 {
4426 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
4427 }
4428 pAsm->D.dst.math = 1;
4429 pAsm->D.dst.op3 = 1;
4430 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4431 pAsm->D.dst.reg = tmp;
4432 pAsm->D.dst.writex = 1;
4433 pAsm->D.dst.writey = 0;
4434 pAsm->D.dst.writez = 0;
4435 pAsm->D.dst.writew = 0;
4436
4437
4438 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4439 pAsm->S[1].src.reg = dstReg;
4440 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4441 noneg_PVSSRC(&(pAsm->S[1].src));
4442 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
4443 pAsm->S[1].src.swizzley = SQ_SEL_Z;
4444 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4445 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
4446
4447 if( GL_FALSE == next_ins(pAsm) )
4448 {
4449 return GL_FALSE;
4450 }
4451
4452 /* dst.z = exp(tmp.x) */
4453 if( GL_FALSE == assemble_dst(pAsm) )
4454 {
4455 return GL_FALSE;
4456 }
4457 if(8 == pAsm->unAsic)
4458 {
4459 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4460 }
4461 else
4462 {
4463 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4464 }
4465 pAsm->D.dst.math = 1;
4466 pAsm->D.dst.writex = 0;
4467 pAsm->D.dst.writey = 0;
4468 pAsm->D.dst.writez = 1;
4469 pAsm->D.dst.writew = 0;
4470
4471 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4472 pAsm->S[0].src.reg = tmp;
4473 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4474 noneg_PVSSRC(&(pAsm->S[0].src));
4475 pAsm->S[0].src.swizzlex = SQ_SEL_X;
4476 pAsm->S[0].src.swizzley = SQ_SEL_X;
4477 pAsm->S[0].src.swizzlez = SQ_SEL_X;
4478 pAsm->S[0].src.swizzlew = SQ_SEL_X;
4479
4480 if( GL_FALSE == next_ins(pAsm) )
4481 {
4482 return GL_FALSE;
4483 }
4484 }
4485 return GL_TRUE;
4486 }
4487
4488 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
4489 {
4490 if( GL_FALSE == checkop2(pAsm) )
4491 {
4492 return GL_FALSE;
4493 }
4494
4495 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4496
4497 if( GL_FALSE == assemble_dst(pAsm) )
4498 {
4499 return GL_FALSE;
4500 }
4501
4502 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4503 {
4504 return GL_FALSE;
4505 }
4506
4507 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4508 {
4509 return GL_FALSE;
4510 }
4511
4512 if( GL_FALSE == next_ins(pAsm) )
4513 {
4514 return GL_FALSE;
4515 }
4516
4517 return GL_TRUE;
4518 }
4519
4520 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
4521 {
4522 if( GL_FALSE == checkop2(pAsm) )
4523 {
4524 return GL_FALSE;
4525 }
4526
4527 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
4528
4529 if( GL_FALSE == assemble_dst(pAsm) )
4530 {
4531 return GL_FALSE;
4532 }
4533
4534 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4535 {
4536 return GL_FALSE;
4537 }
4538
4539 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4540 {
4541 return GL_FALSE;
4542 }
4543
4544 if( GL_FALSE == next_ins(pAsm) )
4545 {
4546 return GL_FALSE;
4547 }
4548
4549 return GL_TRUE;
4550 }
4551
4552 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
4553 {
4554 checkop1(pAsm);
4555
4556 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4557
4558 if (GL_FALSE == assemble_dst(pAsm))
4559 {
4560 return GL_FALSE;
4561 }
4562
4563 if (GL_FALSE == assemble_src(pAsm, 0, -1))
4564 {
4565 return GL_FALSE;
4566 }
4567
4568 if ( GL_FALSE == next_ins(pAsm) )
4569 {
4570 return GL_FALSE;
4571 }
4572
4573 return GL_TRUE;
4574 }
4575
4576 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
4577 {
4578 if( GL_FALSE == checkop2(pAsm) )
4579 {
4580 return GL_FALSE;
4581 }
4582
4583 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4584
4585 if( GL_FALSE == assemble_dst(pAsm) )
4586 {
4587 return GL_FALSE;
4588 }
4589
4590 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4591 {
4592 return GL_FALSE;
4593 }
4594
4595 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4596 {
4597 return GL_FALSE;
4598 }
4599
4600 if( GL_FALSE == next_ins(pAsm) )
4601 {
4602 return GL_FALSE;
4603 }
4604
4605 return GL_TRUE;
4606 }
4607
4608 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
4609 {
4610 BITS tmp;
4611
4612 checkop1(pAsm);
4613
4614 tmp = gethelpr(pAsm);
4615
4616 // LG2 tmp.x, a.swizzle
4617 if(8 == pAsm->unAsic)
4618 {
4619 pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4620 }
4621 else
4622 {
4623 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4624 }
4625 pAsm->D.dst.math = 1;
4626
4627 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4628 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4629 pAsm->D.dst.reg = tmp;
4630 nomask_PVSDST(&(pAsm->D.dst));
4631
4632 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4633 {
4634 return GL_FALSE;
4635 }
4636
4637 if( GL_FALSE == next_ins(pAsm) )
4638 {
4639 return GL_FALSE;
4640 }
4641
4642 // MUL tmp.x, tmp.x, b.swizzle
4643 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4644
4645 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4646 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4647 pAsm->D.dst.reg = tmp;
4648 nomask_PVSDST(&(pAsm->D.dst));
4649
4650 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4651 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4652 pAsm->S[0].src.reg = tmp;
4653 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4654 noneg_PVSSRC(&(pAsm->S[0].src));
4655
4656 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4657 {
4658 return GL_FALSE;
4659 }
4660
4661 if( GL_FALSE == next_ins(pAsm) )
4662 {
4663 return GL_FALSE;
4664 }
4665
4666 // EX2 dst.mask, tmp.x
4667 // EX2 tmp.x, tmp.x
4668 if(8 == pAsm->unAsic)
4669 {
4670 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4671 }
4672 else
4673 {
4674 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4675 }
4676 pAsm->D.dst.math = 1;
4677
4678 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4679 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4680 pAsm->D.dst.reg = tmp;
4681 nomask_PVSDST(&(pAsm->D.dst));
4682
4683 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4684 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4685 pAsm->S[0].src.reg = tmp;
4686 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4687 noneg_PVSSRC(&(pAsm->S[0].src));
4688
4689 if( GL_FALSE == next_ins(pAsm) )
4690 {
4691 return GL_FALSE;
4692 }
4693
4694 // Now replicate result to all necessary channels in destination
4695 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4696
4697 if( GL_FALSE == assemble_dst(pAsm) )
4698 {
4699 return GL_FALSE;
4700 }
4701
4702 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4703 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4704 pAsm->S[0].src.reg = tmp;
4705
4706 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4707 noneg_PVSSRC(&(pAsm->S[0].src));
4708
4709 if( GL_FALSE == next_ins(pAsm) )
4710 {
4711 return GL_FALSE;
4712 }
4713
4714 return GL_TRUE;
4715 }
4716
4717 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4718 {
4719 if(8 == pAsm->unAsic)
4720 {
4721 return assemble_math_function(pAsm, EG_OP2_INST_RECIP_IEEE);
4722 }
4723
4724 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4725 }
4726
4727 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4728 {
4729 if(8 == pAsm->unAsic)
4730 {
4731 return assemble_math_function(pAsm, EG_OP2_INST_RECIPSQRT_IEEE);
4732 }
4733
4734 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4735 }
4736
4737 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4738 {
4739 BITS tmp;
4740
4741 checkop1(pAsm);
4742
4743 tmp = gethelpr(pAsm);
4744
4745 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4746 pAsm->D.dst.op3 = 1;
4747
4748 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4749 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4750 pAsm->D.dst.reg = tmp;
4751
4752 assemble_src(pAsm, 0, -1);
4753
4754 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4755 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4756
4757 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4758 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4759
4760 pAsm->D2.dst2.literal_slots = 1;
4761 pAsm->C[0].f = 1/(3.1415926535 * 2);
4762 pAsm->C[1].f = 0.5F;
4763
4764 if ( GL_FALSE == next_ins(pAsm) )
4765 {
4766 return GL_FALSE;
4767 }
4768
4769 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
4770
4771 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4772 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4773 pAsm->D.dst.reg = tmp;
4774 pAsm->D.dst.writex = 1;
4775
4776 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4777 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4778 pAsm->S[0].src.reg = tmp;
4779 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4780
4781 if(( GL_FALSE == next_ins(pAsm) ))
4782 {
4783 return GL_FALSE;
4784 }
4785 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4786 pAsm->D.dst.op3 = 1;
4787
4788 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4789 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4790 pAsm->D.dst.reg = tmp;
4791
4792 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4793 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4794 pAsm->S[0].src.reg = tmp;
4795 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4796
4797 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4798 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4799
4800 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4801 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4802
4803 pAsm->D2.dst2.literal_slots = 1;
4804
4805 if(pAsm->bR6xx) {
4806 pAsm->C[0].f = 3.1415926535897f * 2.0f;
4807 pAsm->C[1].f = -3.1415926535897f;
4808 } else {
4809 pAsm->C[0].f = 1.0f;
4810 pAsm->C[1].f = -0.5f;
4811 }
4812
4813 if(( GL_FALSE == next_ins(pAsm) ))
4814 {
4815 return GL_FALSE;
4816 }
4817
4818 // COS dst.x, a.x
4819 if(8 == pAsm->unAsic)
4820 {
4821 pAsm->D.dst.opcode = EG_OP2_INST_COS;
4822 }
4823 else
4824 {
4825 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4826 }
4827 pAsm->D.dst.math = 1;
4828
4829 assemble_dst(pAsm);
4830 /* mask y */
4831 pAsm->D.dst.writey = 0;
4832
4833 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4834 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4835 pAsm->S[0].src.reg = tmp;
4836 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4837 noneg_PVSSRC(&(pAsm->S[0].src));
4838
4839 if ( GL_FALSE == next_ins(pAsm) )
4840 {
4841 return GL_FALSE;
4842 }
4843
4844 // SIN dst.y, a.x
4845 if(8 == pAsm->unAsic)
4846 {
4847 pAsm->D.dst.opcode = EG_OP2_INST_SIN;
4848 }
4849 else
4850 {
4851 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4852 }
4853 pAsm->D.dst.math = 1;
4854
4855 assemble_dst(pAsm);
4856 /* mask x */
4857 pAsm->D.dst.writex = 0;
4858
4859 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4860 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4861 pAsm->S[0].src.reg = tmp;
4862 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4863 noneg_PVSSRC(&(pAsm->S[0].src));
4864
4865 if( GL_FALSE == next_ins(pAsm) )
4866 {
4867 return GL_FALSE;
4868 }
4869
4870 return GL_TRUE;
4871 }
4872
4873 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4874 {
4875 if( GL_FALSE == checkop2(pAsm) )
4876 {
4877 return GL_FALSE;
4878 }
4879
4880 pAsm->D.dst.opcode = opcode;
4881 //pAsm->D.dst.math = 1;
4882
4883 if( GL_FALSE == assemble_dst(pAsm) )
4884 {
4885 return GL_FALSE;
4886 }
4887
4888 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4889 {
4890 return GL_FALSE;
4891 }
4892
4893 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4894 {
4895 return GL_FALSE;
4896 }
4897
4898 if( GL_FALSE == next_ins(pAsm) )
4899 {
4900 return GL_FALSE;
4901 }
4902
4903 return GL_TRUE;
4904 }
4905
4906 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4907 {
4908 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4909
4910 pAsm->D.dst.opcode = opcode;
4911 pAsm->D.dst.math = 1;
4912 pAsm->D.dst.predicated = 1;
4913
4914 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4915 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4916 pAsm->D.dst.reg = pAsm->uHelpReg;
4917 pAsm->D.dst.writex = 1;
4918 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4919
4920 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4921 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4922 pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
4923 pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
4924 noneg_PVSSRC(&(pAsm->S[0].src));
4925
4926 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4927 pAsm->S[1].src.reg = pAsm->uHelpReg;
4928 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4929 noneg_PVSSRC(&(pAsm->S[1].src));
4930 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4931 pAsm->S[1].src.swizzley = SQ_SEL_0;
4932 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4933 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4934
4935 if( GL_FALSE == next_ins(pAsm) )
4936 {
4937 return GL_FALSE;
4938 }
4939
4940 return GL_TRUE;
4941 }
4942
4943 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4944 {
4945 if( GL_FALSE == checkop2(pAsm) )
4946 {
4947 return GL_FALSE;
4948 }
4949
4950 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4951
4952 if( GL_FALSE == assemble_dst(pAsm) )
4953 {
4954 return GL_FALSE;
4955 }
4956
4957 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4958 {
4959 return GL_FALSE;
4960 }
4961
4962 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4963 {
4964 return GL_FALSE;
4965 }
4966
4967 if( GL_FALSE == next_ins(pAsm) )
4968 {
4969 return GL_FALSE;
4970 }
4971
4972 return GL_TRUE;
4973 }
4974
4975 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4976 {
4977 if( GL_FALSE == checkop2(pAsm) )
4978 {
4979 return GL_FALSE;
4980 }
4981
4982 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4983
4984 if( GL_FALSE == assemble_dst(pAsm) )
4985 {
4986 return GL_FALSE;
4987 }
4988
4989 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4990 {
4991 return GL_FALSE;
4992 }
4993
4994 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4995 {
4996 return GL_FALSE;
4997 }
4998
4999 if( GL_FALSE == next_ins(pAsm) )
5000 {
5001 return GL_FALSE;
5002 }
5003
5004 return GL_TRUE;
5005 }
5006
5007 GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
5008 {
5009 checkop1(pAsm);
5010
5011 GLuint tmp = gethelpr(pAsm);
5012 /* tmp = (src > 0 ? 1 : src) */
5013 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5014 pAsm->D.dst.op3 = 1;
5015 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5016 pAsm->D.dst.reg = tmp;
5017
5018 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5019 {
5020 return GL_FALSE;
5021 }
5022
5023 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5024
5025 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
5026 {
5027 return GL_FALSE;
5028 }
5029
5030 if( GL_FALSE == next_ins(pAsm) )
5031 {
5032 return GL_FALSE;
5033 }
5034
5035 /* dst = (-tmp > 0 ? -1 : tmp) */
5036 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5037 pAsm->D.dst.op3 = 1;
5038
5039 if( GL_FALSE == assemble_dst(pAsm) )
5040 {
5041 return GL_FALSE;
5042 }
5043
5044 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5045 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5046 pAsm->S[0].src.reg = tmp;
5047 noswizzle_PVSSRC(&(pAsm->S[0].src));
5048 neg_PVSSRC(&(pAsm->S[0].src));
5049
5050 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5051 neg_PVSSRC(&(pAsm->S[1].src));
5052
5053 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5054 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5055 pAsm->S[2].src.reg = tmp;
5056 noswizzle_PVSSRC(&(pAsm->S[2].src));
5057
5058 if( GL_FALSE == next_ins(pAsm) )
5059 {
5060 return GL_FALSE;
5061 }
5062
5063 return GL_TRUE;
5064 }
5065
5066 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
5067 {
5068 return GL_TRUE;
5069 }
5070
5071 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
5072 {
5073 GLboolean src_const;
5074 GLboolean need_barrier = GL_FALSE;
5075
5076 checkop1(pAsm);
5077
5078 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
5079 {
5080 case PROGRAM_UNIFORM:
5081 case PROGRAM_CONSTANT:
5082 case PROGRAM_LOCAL_PARAM:
5083 case PROGRAM_ENV_PARAM:
5084 case PROGRAM_STATE_VAR:
5085 src_const = GL_TRUE;
5086 break;
5087 case PROGRAM_TEMPORARY:
5088 case PROGRAM_INPUT:
5089 default:
5090 src_const = GL_FALSE;
5091 break;
5092 }
5093
5094 if (GL_TRUE == src_const)
5095 {
5096 if ( GL_FALSE == mov_temp(pAsm, 0) )
5097 return GL_FALSE;
5098 need_barrier = GL_TRUE;
5099 }
5100
5101 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5102 {
5103 GLuint tmp = gethelpr(pAsm);
5104 if(8 == pAsm->unAsic)
5105 {
5106 pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5107 }
5108 else
5109 {
5110 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5111 }
5112 pAsm->D.dst.math = 1;
5113 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5114 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5115 pAsm->D.dst.reg = tmp;
5116 pAsm->D.dst.writew = 1;
5117
5118 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5119 {
5120 return GL_FALSE;
5121 }
5122 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
5123 if( GL_FALSE == next_ins(pAsm) )
5124 {
5125 return GL_FALSE;
5126 }
5127
5128 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5129 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5130 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5131 pAsm->D.dst.reg = tmp;
5132 pAsm->D.dst.writex = 1;
5133 pAsm->D.dst.writey = 1;
5134 pAsm->D.dst.writez = 1;
5135 pAsm->D.dst.writew = 0;
5136
5137 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5138 {
5139 return GL_FALSE;
5140 }
5141 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5142 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5143 pAsm->S[1].src.reg = tmp;
5144 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
5145
5146 if( GL_FALSE == next_ins(pAsm) )
5147 {
5148 return GL_FALSE;
5149 }
5150
5151 pAsm->aArgSubst[1] = tmp;
5152 need_barrier = GL_TRUE;
5153 }
5154
5155 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
5156 {
5157 GLuint tmp1 = gethelpr(pAsm);
5158 GLuint tmp2 = gethelpr(pAsm);
5159
5160 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
5161 if(8 == pAsm->unAsic)
5162 {
5163 pAsm->D.dst.opcode = EG_OP2_INST_CUBE;
5164 }
5165 else
5166 {
5167 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
5168 }
5169 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5170 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5171 pAsm->D.dst.reg = tmp1;
5172 nomask_PVSDST(&(pAsm->D.dst));
5173
5174 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5175 {
5176 return GL_FALSE;
5177 }
5178
5179 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
5180 {
5181 return GL_FALSE;
5182 }
5183
5184 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
5185 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
5186
5187 if( GL_FALSE == next_ins(pAsm) )
5188 {
5189 return GL_FALSE;
5190 }
5191
5192 /* tmp1.z = RCP_e(|tmp1.z|) */
5193 if(8 == pAsm->unAsic)
5194 {
5195 pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5196 }
5197 else
5198 {
5199 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5200 }
5201 pAsm->D.dst.math = 1;
5202 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5203 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5204 pAsm->D.dst.reg = tmp1;
5205 pAsm->D.dst.writez = 1;
5206
5207 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5208 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5209 pAsm->S[0].src.reg = tmp1;
5210 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
5211 pAsm->S[0].src.abs = 1;
5212
5213 next_ins(pAsm);
5214
5215 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
5216 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
5217 * muladd has no writemask, have to use another temp
5218 */
5219 if(8 == pAsm->unAsic)
5220 {
5221 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5222 }
5223 else
5224 {
5225 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5226 }
5227 pAsm->D.dst.op3 = 1;
5228 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5229 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5230 pAsm->D.dst.reg = tmp2;
5231
5232 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5233 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5234 pAsm->S[0].src.reg = tmp1;
5235 noswizzle_PVSSRC(&(pAsm->S[0].src));
5236 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5237 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5238 pAsm->S[1].src.reg = tmp1;
5239 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
5240 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5241 /* immediate c 1.5 */
5242 pAsm->D2.dst2.literal_slots = 1;
5243 pAsm->C[0].f = 1.5F;
5244 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
5245 pAsm->S[2].src.reg = tmp1;
5246 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
5247
5248 next_ins(pAsm);
5249
5250 /* tmp1.xy = temp2.xy */
5251 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5252 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5253 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5254 pAsm->D.dst.reg = tmp1;
5255 pAsm->D.dst.writex = 1;
5256 pAsm->D.dst.writey = 1;
5257 pAsm->D.dst.writez = 0;
5258 pAsm->D.dst.writew = 0;
5259
5260 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5261 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5262 pAsm->S[0].src.reg = tmp2;
5263 noswizzle_PVSSRC(&(pAsm->S[0].src));
5264
5265 next_ins(pAsm);
5266 pAsm->aArgSubst[1] = tmp1;
5267 need_barrier = GL_TRUE;
5268
5269 }
5270
5271 switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
5272 {
5273 case OPCODE_DDX:
5274 /* will these need WQM(1) on CF inst ? */
5275 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
5276 break;
5277 case OPCODE_DDY:
5278 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
5279 break;
5280 case OPCODE_TXB:
5281 /* this should actually be SAMPLE_LB but that needs bias to be
5282 * embedded in the instruction - cant do here */
5283 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5284 break;
5285 case OPCODE_TXL:
5286 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5287 break;
5288 default:
5289 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5290 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
5291 else
5292 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
5293 }
5294
5295 pAsm->is_tex = GL_TRUE;
5296 if ( GL_TRUE == need_barrier )
5297
5298 pAsm->is_tex = GL_TRUE;
5299 if ( GL_TRUE == need_barrier )
5300 {
5301 pAsm->need_tex_barrier = GL_TRUE;
5302 }
5303 // Set src1 to tex unit id
5304 pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5305 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5306
5307 //No sw info from mesa compiler, so hard code here.
5308 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5309 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5310 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5311 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5312
5313 if( GL_FALSE == tex_dst(pAsm) )
5314 {
5315 return GL_FALSE;
5316 }
5317
5318 if( GL_FALSE == tex_src(pAsm) )
5319 {
5320 return GL_FALSE;
5321 }
5322
5323 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5324 {
5325 /* hopefully did swizzles before */
5326 noswizzle_PVSSRC(&(pAsm->S[0].src));
5327 }
5328
5329 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
5330 {
5331 /* SAMPLE dst, tmp.yxwy, CUBE */
5332 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
5333 pAsm->S[0].src.swizzley = SQ_SEL_X;
5334 pAsm->S[0].src.swizzlez = SQ_SEL_W;
5335 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
5336 }
5337
5338 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5339 {
5340 /* compare value goes to w chan ? */
5341 pAsm->S[0].src.swizzlew = SQ_SEL_Z;
5342 }
5343
5344 if ( GL_FALSE == next_ins(pAsm) )
5345 {
5346 return GL_FALSE;
5347 }
5348
5349 /* add ARB shadow ambient but clamp to 0..1 */
5350 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5351 {
5352 /* ADD_SAT dst, dst, ambient[texunit] */
5353 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
5354
5355 if( GL_FALSE == assemble_dst(pAsm) )
5356 {
5357 return GL_FALSE;
5358 }
5359 pAsm->D2.dst2.SaturateMode = 1;
5360
5361 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5362 pAsm->S[0].src.reg = pAsm->D.dst.reg;
5363 noswizzle_PVSSRC(&(pAsm->S[0].src));
5364 noneg_PVSSRC(&(pAsm->S[0].src));
5365
5366 pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
5367 pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5368 noswizzle_PVSSRC(&(pAsm->S[1].src));
5369 noneg_PVSSRC(&(pAsm->S[1].src));
5370
5371 if( GL_FALSE == next_ins(pAsm) )
5372 {
5373 return GL_FALSE;
5374 }
5375
5376 }
5377
5378 return GL_TRUE;
5379 }
5380
5381 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
5382 {
5383 BITS tmp1;
5384 BITS tmp2 = 0;
5385
5386 if( GL_FALSE == checkop2(pAsm) )
5387 {
5388 return GL_FALSE;
5389 }
5390
5391 tmp1 = gethelpr(pAsm);
5392
5393 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5394
5395 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5396 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5397 pAsm->D.dst.reg = tmp1;
5398 nomask_PVSDST(&(pAsm->D.dst));
5399
5400 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5401 {
5402 return GL_FALSE;
5403 }
5404
5405 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5406 {
5407 return GL_FALSE;
5408 }
5409
5410 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5411 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5412
5413 if( GL_FALSE == next_ins(pAsm) )
5414 {
5415 return GL_FALSE;
5416 }
5417
5418 if(8 == pAsm->unAsic)
5419 {
5420 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5421 }
5422 else
5423 {
5424 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5425 }
5426 pAsm->D.dst.op3 = 1;
5427
5428 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
5429 {
5430 tmp2 = gethelpr(pAsm);
5431
5432 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5433 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5434 pAsm->D.dst.reg = tmp2;
5435
5436 nomask_PVSDST(&(pAsm->D.dst));
5437 }
5438 else
5439 {
5440 if( GL_FALSE == assemble_dst(pAsm) )
5441 {
5442 return GL_FALSE;
5443 }
5444 }
5445
5446 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5447 {
5448 return GL_FALSE;
5449 }
5450
5451 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5452 {
5453 return GL_FALSE;
5454 }
5455
5456 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5457 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5458
5459 // result1 + (neg) result0
5460 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
5461 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5462 pAsm->S[2].src.reg = tmp1;
5463
5464 neg_PVSSRC(&(pAsm->S[2].src));
5465 noswizzle_PVSSRC(&(pAsm->S[2].src));
5466
5467 if( GL_FALSE == next_ins(pAsm) )
5468 {
5469 return GL_FALSE;
5470 }
5471
5472
5473 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
5474 {
5475 if( GL_FALSE == assemble_dst(pAsm) )
5476 {
5477 return GL_FALSE;
5478 }
5479
5480 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5481
5482 // Use tmp as source
5483 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5484 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5485 pAsm->S[0].src.reg = tmp2;
5486
5487 noneg_PVSSRC(&(pAsm->S[0].src));
5488 noswizzle_PVSSRC(&(pAsm->S[0].src));
5489
5490 if( GL_FALSE == next_ins(pAsm) )
5491 {
5492 return GL_FALSE;
5493 }
5494 }
5495
5496 return GL_TRUE;
5497 }
5498
5499 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
5500 {
5501 return GL_TRUE;
5502 }
5503
5504 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
5505 {
5506 switch (uReason)
5507 {
5508 case FC_PUSH_VPM:
5509 pAsm->CALLSTACK[pAsm->CALLSP].current--;
5510 break;
5511 case FC_PUSH_WQM:
5512 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5513 break;
5514 case FC_LOOP:
5515 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5516 break;
5517 case FC_REP:
5518 /* TODO : for 16 vp asic, should -= 2; */
5519 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
5520 break;
5521 };
5522 }
5523
5524 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
5525 {
5526 if(GL_TRUE == bCheckMaxOnly)
5527 {
5528 switch (uReason)
5529 {
5530 case FC_PUSH_VPM:
5531 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
5532 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5533 {
5534 pAsm->CALLSTACK[pAsm->CALLSP].max =
5535 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
5536 }
5537 break;
5538 case FC_PUSH_WQM:
5539 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
5540 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5541 {
5542 pAsm->CALLSTACK[pAsm->CALLSP].max =
5543 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
5544 }
5545 break;
5546 }
5547 return;
5548 }
5549
5550 switch (uReason)
5551 {
5552 case FC_PUSH_VPM:
5553 pAsm->CALLSTACK[pAsm->CALLSP].current++;
5554 break;
5555 case FC_PUSH_WQM:
5556 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5557 break;
5558 case FC_LOOP:
5559 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5560 break;
5561 case FC_REP:
5562 /* TODO : for 16 vp asic, should += 2; */
5563 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
5564 break;
5565 };
5566
5567 if(pAsm->CALLSTACK[pAsm->CALLSP].current
5568 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5569 {
5570 pAsm->CALLSTACK[pAsm->CALLSP].max =
5571 pAsm->CALLSTACK[pAsm->CALLSP].current;
5572 }
5573 }
5574
5575 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
5576 {
5577 if(GL_FALSE == add_cf_instruction(pAsm) )
5578 {
5579 return GL_FALSE;
5580 }
5581
5582 if(8 == pAsm->unAsic)
5583 {
5584 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5585 EG_CF_INST_JUMP,
5586 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5587 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5588 pops,
5589 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5590 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5591 0,
5592 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5593 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5594 SQ_CF_COND_ACTIVE,
5595 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5596 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5597 0,
5598 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5599 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5600 0,
5601 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5602 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5603 0,
5604 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5605 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5606 0,
5607 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5608 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5609 1,
5610 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5611 }
5612 else
5613 {
5614 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5615 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5616 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5617
5618 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5619 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5620 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5621 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5622
5623 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5624 }
5625
5626 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
5627
5628 return GL_TRUE;
5629 }
5630
5631 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
5632 {
5633 if(GL_FALSE == add_cf_instruction(pAsm) )
5634 {
5635 return GL_FALSE;
5636 }
5637
5638 if(8 == pAsm->unAsic)
5639 {
5640 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5641 EG_CF_INST_POP,
5642 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5643 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5644 pops,
5645 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5646 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5647 0,
5648 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5649 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5650 SQ_CF_COND_ACTIVE,
5651 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5652 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5653 0,
5654 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5655 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5656 0,
5657 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5658 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5659 0,
5660 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5661 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5662 1,
5663 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5664 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5665 0,
5666 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5667 }
5668 else
5669 {
5670 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5671 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5672 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5673
5674 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5675 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5676 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5677
5678 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5679
5680 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5681 }
5682 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5683
5684 return GL_TRUE;
5685 }
5686
5687 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
5688 {
5689 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5690
5691 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
5692
5693
5694 if(GL_FALSE == add_cf_instruction(pAsm) )
5695 {
5696 return GL_FALSE;
5697 }
5698
5699 if(8 == pAsm->unAsic)
5700 {
5701 if(GL_TRUE != bHasElse)
5702 {
5703 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5704 1,
5705 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5706 }
5707 else
5708 {
5709 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5710 0,
5711 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5712 }
5713
5714 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5715 EG_CF_INST_JUMP,
5716 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5717 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5718 0,
5719 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5720 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5721 SQ_CF_COND_ACTIVE,
5722 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5723 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5724 0,
5725 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5726 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5727 0,
5728 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5729 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5730 0,
5731 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5732 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5733 1,
5734 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5735 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5736 0,
5737 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5738 }
5739 else
5740 {
5741 if(GL_TRUE != bHasElse)
5742 {
5743 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5744 }
5745 else
5746 {
5747 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5748 }
5749 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5750 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5751
5752 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5753 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5754 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5755 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5756
5757 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5758 }
5759
5760 pAsm->FCSP++;
5761 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
5762 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5763 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
5764 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5765
5766 #ifndef USE_CF_FOR_POP_AFTER
5767 if(GL_TRUE != bHasElse)
5768 {
5769 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5770 }
5771 #endif /* USE_CF_FOR_POP_AFTER */
5772
5773 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
5774
5775 return GL_TRUE;
5776 }
5777
5778 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
5779 {
5780 if(GL_FALSE == add_cf_instruction(pAsm) )
5781 {
5782 return GL_FALSE;
5783 }
5784
5785 if(8 == pAsm->unAsic)
5786 {
5787 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5788 1,
5789 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5790 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5791 EG_CF_INST_ELSE,
5792 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5793 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5794 0,
5795 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5796 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5797 SQ_CF_COND_ACTIVE,
5798 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5799 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5800 0,
5801 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5802 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5803 0,
5804 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5805 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5806 0,
5807 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5808 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5809 1,
5810 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5811 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5812 0,
5813 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5814 }
5815 else
5816 {
5817 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
5818 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5819 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5820
5821 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5822 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5823 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
5824 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5825
5826 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5827 }
5828
5829 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
5830 0,
5831 sizeof(R700ControlFlowGenericClause *) );
5832 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
5833 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5834
5835 #ifndef USE_CF_FOR_POP_AFTER
5836 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5837 #endif /* USE_CF_FOR_POP_AFTER */
5838
5839 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
5840
5841 return GL_TRUE;
5842 }
5843
5844 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
5845 {
5846 #ifdef USE_CF_FOR_POP_AFTER
5847 pops(pAsm, 1);
5848 #endif /* USE_CF_FOR_POP_AFTER */
5849
5850 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5851
5852 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
5853 {
5854 /* no else in between */
5855 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5856 }
5857 else
5858 {
5859 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5860 }
5861
5862 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5863 {
5864 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5865 }
5866
5867 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
5868 {
5869 radeon_error("if/endif in shader code are not paired. \n");
5870 return GL_FALSE;
5871 }
5872
5873 pAsm->FCSP--;
5874
5875 decreaseCurrent(pAsm, FC_PUSH_VPM);
5876
5877 return GL_TRUE;
5878 }
5879
5880 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
5881 {
5882 if(GL_FALSE == add_cf_instruction(pAsm) )
5883 {
5884 return GL_FALSE;
5885 }
5886
5887 if(8 == pAsm->unAsic)
5888 {
5889 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5890 0,
5891 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5892 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5893 EG_CF_INST_LOOP_START_NO_AL,
5894 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5895 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5896 0,
5897 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5898 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5899 SQ_CF_COND_ACTIVE,
5900 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5901 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5902 0,
5903 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5904 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5905 0,
5906 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5907 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5908 0,
5909 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5910 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5911 1,
5912 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5913 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5914 0,
5915 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5916 }
5917 else
5918 {
5919 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5920 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5921 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5922
5923 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5924 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5925 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
5926 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5927
5928 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5929 }
5930
5931 pAsm->FCSP++;
5932 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
5933 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5934 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
5935 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
5936 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5937
5938 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
5939
5940 return GL_TRUE;
5941 }
5942
5943 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
5944 {
5945 #ifdef USE_CF_FOR_CONTINUE_BREAK
5946
5947 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5948
5949 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
5950
5951 unsigned int unFCSP;
5952 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5953 {
5954 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5955 {
5956 break;
5957 }
5958 }
5959 if(0 == FC_LOOP)
5960 {
5961 radeon_error("Break is not inside loop/endloop pair.\n");
5962 return GL_FALSE;
5963 }
5964
5965 if(GL_FALSE == add_cf_instruction(pAsm) )
5966 {
5967 return GL_FALSE;
5968 }
5969
5970 if(8 == pAsm->unAsic)
5971 {
5972 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5973 1,
5974 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5975 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5976 EG_CF_INST_LOOP_BREAK,
5977 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5978 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5979 0,
5980 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5981 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5982 SQ_CF_COND_ACTIVE,
5983 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5984 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5985 0,
5986 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5987 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5988 0,
5989 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5990 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5991 0,
5992 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5993 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5994 1,
5995 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5996 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5997 0,
5998 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5999 }
6000 else
6001 {
6002 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6003 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6004 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6005
6006 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6007 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6008 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
6009
6010 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6011
6012 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6013 }
6014
6015 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6016 (void *)pAsm->fc_stack[unFCSP].mid,
6017 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6018 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6019 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6020 pAsm->fc_stack[unFCSP].unNumMid++;
6021
6022 if(GL_FALSE == add_cf_instruction(pAsm) )
6023 {
6024 return GL_FALSE;
6025 }
6026
6027 if(8 == pAsm->unAsic)
6028 {
6029 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6030 1,
6031 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6032 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6033 EG_CF_INST_POP,
6034 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6035 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6036 0,
6037 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6038 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6039 SQ_CF_COND_ACTIVE,
6040 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6041 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6042 0,
6043 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6044 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6045 0,
6046 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6047 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6048 0,
6049 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6050 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6051 1,
6052 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6053 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6054 0,
6055 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6056 }
6057 else
6058 {
6059 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6060 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6061 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6062
6063 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6064 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6065 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
6066
6067 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6068
6069 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6070 }
6071
6072 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6073
6074 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6075
6076 #endif //USE_CF_FOR_CONTINUE_BREAK
6077 return GL_TRUE;
6078 }
6079
6080 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
6081 {
6082 #ifdef USE_CF_FOR_CONTINUE_BREAK
6083 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6084
6085 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
6086
6087 unsigned int unFCSP;
6088 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
6089 {
6090 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6091 {
6092 break;
6093 }
6094 }
6095 if(0 == FC_LOOP)
6096 {
6097 radeon_error("Continue is not inside loop/endloop pair.\n");
6098 return GL_FALSE;
6099 }
6100
6101 if(GL_FALSE == add_cf_instruction(pAsm) )
6102 {
6103 return GL_FALSE;
6104 }
6105
6106 if(8 == pAsm->unAsic)
6107 {
6108 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6109 1,
6110 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6111 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6112 EG_CF_INST_LOOP_CONTINUE,
6113 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6114 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6115 0,
6116 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6117 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6118 SQ_CF_COND_ACTIVE,
6119 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6120 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6121 0,
6122 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6123 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6124 0,
6125 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6126 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6127 0,
6128 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6129 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6130 1,
6131 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6132 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6133 0,
6134 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6135 }
6136 else
6137 {
6138 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6139 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6140 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6141
6142 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6143 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6144 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
6145
6146 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6147
6148 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6149 }
6150
6151 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6152 (void *)pAsm->fc_stack[unFCSP].mid,
6153 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6154 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6155 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6156 pAsm->fc_stack[unFCSP].unNumMid++;
6157
6158 if(GL_FALSE == add_cf_instruction(pAsm) )
6159 {
6160 return GL_FALSE;
6161 }
6162
6163 if(8 == pAsm->unAsic)
6164 {
6165 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6166 1,
6167 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6168 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6169 EG_CF_INST_POP,
6170 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6171 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6172 0,
6173 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6174 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6175 SQ_CF_COND_ACTIVE,
6176 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6177 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6178 0,
6179 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6180 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6181 0,
6182 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6183 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6184 0,
6185 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6186 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6187 1,
6188 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6189 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6190 0,
6191 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6192 }
6193 else
6194 {
6195 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6196 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6197 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6198
6199 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6200 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6201 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
6202
6203 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6204
6205 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6206 }
6207
6208 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6209
6210 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6211
6212 #endif /* USE_CF_FOR_CONTINUE_BREAK */
6213
6214 return GL_TRUE;
6215 }
6216
6217 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
6218 {
6219 GLuint i;
6220
6221 if(GL_FALSE == add_cf_instruction(pAsm) )
6222 {
6223 return GL_FALSE;
6224 }
6225
6226 if(8 == pAsm->unAsic)
6227 {
6228 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6229 0,
6230 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6231 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6232 EG_CF_INST_LOOP_END,
6233 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6234 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6235 0,
6236 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6237 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6238 SQ_CF_COND_ACTIVE,
6239 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6240 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6241 0,
6242 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6243 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6244 0,
6245 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6246 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6247 0,
6248 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6249 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6250 1,
6251 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6252 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6253 0,
6254 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6255 }
6256 else
6257 {
6258 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6259 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6260 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6261
6262 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6263 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6264 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
6265 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6266
6267 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6268 }
6269
6270 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
6271 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6272
6273 #ifdef USE_CF_FOR_CONTINUE_BREAK
6274 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
6275 {
6276 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
6277 }
6278 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
6279 {
6280 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
6281 }
6282 #endif
6283
6284 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
6285 {
6286 radeon_error("loop/endloop in shader code are not paired. \n");
6287 return GL_FALSE;
6288 }
6289
6290 GLuint unFCSP;
6291 GLuint unIF = 0;
6292 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
6293 {
6294 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6295 {
6296 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6297 {
6298 breakLoopOnFlag(pAsm, unFCSP);
6299 break;
6300 }
6301 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6302 {
6303 unIF++;
6304 }
6305 }
6306 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
6307 {
6308 #ifdef USE_CF_FOR_POP_AFTER
6309 returnOnFlag(pAsm, unIF);
6310 #else
6311 returnOnFlag(pAsm, 0);
6312 #endif /* USE_CF_FOR_POP_AFTER */
6313 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
6314 }
6315 }
6316
6317 pAsm->FCSP--;
6318
6319 decreaseCurrent(pAsm, FC_LOOP);
6320
6321 return GL_TRUE;
6322 }
6323
6324 void add_return_inst(r700_AssemblerBase *pAsm)
6325 {
6326 if(GL_FALSE == add_cf_instruction(pAsm) )
6327 {
6328 return;
6329 }
6330
6331 if(8 == pAsm->unAsic)
6332 {
6333 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6334 0,
6335 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6336 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6337 EG_CF_INST_RETURN,
6338 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6339 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6340 0,
6341 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6342 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6343 SQ_CF_COND_ACTIVE,
6344 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6345 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6346 0,
6347 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6348 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6349 0,
6350 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6351 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6352 0,
6353 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6354 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6355 1,
6356 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6357 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6358 0,
6359 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6360 }
6361 else
6362 {
6363 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6364 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6365 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6366 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6367
6368 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6369 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6370 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
6371 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6372
6373 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6374 }
6375 }
6376
6377 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
6378 {
6379 /* Put in sub */
6380 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
6381 {
6382 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
6383 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
6384 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
6385 if(NULL == pAsm->subs)
6386 {
6387 return GL_FALSE;
6388 }
6389 pAsm->unSubArraySize += 10;
6390 }
6391
6392 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
6393 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
6394 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
6395 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
6396
6397 pAsm->CALLSP++;
6398 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
6399 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
6400 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
6401 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
6402 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
6403 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
6404 SetActiveCFlist(pAsm->pR700Shader,
6405 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6406
6407 pAsm->unSubArrayPointer++;
6408
6409 /* start sub */
6410 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6411
6412 pAsm->FCSP++;
6413 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
6414
6415 checkStackDepth(pAsm, FC_REP, GL_FALSE);
6416
6417 return GL_TRUE;
6418 }
6419
6420 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
6421 {
6422 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
6423 {
6424 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
6425 return GL_FALSE;
6426 }
6427
6428 /* copy max to sub structure */
6429 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
6430 = pAsm->CALLSTACK[pAsm->CALLSP].max;
6431
6432 decreaseCurrent(pAsm, FC_REP);
6433
6434 pAsm->CALLSP--;
6435 SetActiveCFlist(pAsm->pR700Shader,
6436 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6437
6438 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6439
6440 pAsm->FCSP--;
6441
6442 return GL_TRUE;
6443 }
6444
6445 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
6446 {
6447 GLuint unIF = 0;
6448
6449 if(pAsm->CALLSP > 0)
6450 { /* in sub */
6451 GLuint unFCSP;
6452 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6453 {
6454 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6455 {
6456 setRetInLoopFlag(pAsm, SQ_SEL_1);
6457 breakLoopOnFlag(pAsm, unFCSP);
6458 pAsm->unCFflags |= LOOPRET_FLAGS;
6459
6460 return GL_TRUE;
6461 }
6462 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6463 {
6464 unIF++;
6465 }
6466 }
6467 }
6468
6469 #ifdef USE_CF_FOR_POP_AFTER
6470 if(unIF > 0)
6471 {
6472 pops(pAsm, unIF);
6473 }
6474 #endif /* USE_CF_FOR_POP_AFTER */
6475
6476 add_return_inst(pAsm);
6477
6478 return GL_TRUE;
6479 }
6480
6481 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
6482 GLint nILindex,
6483 GLuint uiIL_Shift,
6484 GLuint uiNumberInsts,
6485 struct prog_instruction *pILInst,
6486 PRESUB_DESC * pPresubDesc)
6487 {
6488 GLint uiIL_Offset;
6489
6490 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6491
6492 if(GL_FALSE == add_cf_instruction(pAsm) )
6493 {
6494 return GL_FALSE;
6495 }
6496
6497 if(8 == pAsm->unAsic)
6498 {
6499 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6500 0,
6501 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6502 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6503 EG_CF_INST_CALL,
6504 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6505 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6506 0,
6507 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6508 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6509 SQ_CF_COND_ACTIVE,
6510 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6511 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6512 0,
6513 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6514 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6515 0,
6516 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6517 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6518 0,
6519 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6520 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6521 1,
6522 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6523 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6524 1,
6525 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6526 }
6527 else
6528 {
6529 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
6530 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6531 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6532 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6533
6534 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6535 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6536 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
6537 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6538
6539 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6540 }
6541
6542 /* Put in caller */
6543 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
6544 {
6545 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
6546 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
6547 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
6548 if(NULL == pAsm->callers)
6549 {
6550 return GL_FALSE;
6551 }
6552 pAsm->unCallerArraySize += 10;
6553 }
6554
6555 uiIL_Offset = nILindex + uiIL_Shift;
6556 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
6557 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
6558
6559 pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
6560 pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
6561
6562 pAsm->unCallerArrayPointer++;
6563
6564 int j;
6565 GLuint max;
6566 GLuint unSubID;
6567 GLboolean bRet;
6568 for(j=0; j<pAsm->unSubArrayPointer; j++)
6569 {
6570 if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
6571 { /* compiled before */
6572
6573 max = pAsm->subs[j].unStackDepthMax
6574 + pAsm->CALLSTACK[pAsm->CALLSP].current;
6575 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6576 {
6577 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6578 }
6579
6580 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
6581 return GL_TRUE;
6582 }
6583 }
6584
6585 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
6586 unSubID = pAsm->unSubArrayPointer;
6587
6588 bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
6589
6590 if(GL_TRUE == bRet)
6591 {
6592 max = pAsm->subs[unSubID].unStackDepthMax
6593 + pAsm->CALLSTACK[pAsm->CALLSP].current;
6594 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6595 {
6596 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6597 }
6598
6599 pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
6600 }
6601
6602 return bRet;
6603 }
6604
6605 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
6606 {
6607 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6608
6609 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6610 pAsm->D.dst.op3 = 0;
6611 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6612 pAsm->D.dst.reg = pAsm->flag_reg_index;
6613 pAsm->D.dst.writex = 1;
6614 pAsm->D.dst.writey = 0;
6615 pAsm->D.dst.writez = 0;
6616 pAsm->D.dst.writew = 0;
6617 pAsm->D2.dst2.literal_slots = 1;
6618 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6619 pAsm->D.dst.predicated = 0;
6620 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
6621 pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
6622 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6623 #if 0
6624 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
6625 //pAsm->S[0].src.reg = 0;
6626 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6627 noneg_PVSSRC(&(pAsm->S[0].src));
6628 pAsm->S[0].src.swizzlex = SQ_SEL_X;
6629 pAsm->S[0].src.swizzley = SQ_SEL_Y;
6630 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6631 pAsm->S[0].src.swizzlew = SQ_SEL_W;
6632
6633 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6634 {
6635 return GL_FALSE;
6636 }
6637 #else
6638 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6639 pAsm->S[0].src.reg = 0;
6640 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6641 noneg_PVSSRC(&(pAsm->S[0].src));
6642 pAsm->S[0].src.swizzlex = flagValue;
6643 pAsm->S[0].src.swizzley = flagValue;
6644 pAsm->S[0].src.swizzlez = flagValue;
6645 pAsm->S[0].src.swizzlew = flagValue;
6646
6647 if( GL_FALSE == next_ins(pAsm) )
6648 {
6649 return GL_FALSE;
6650 }
6651 #endif
6652
6653 return GL_TRUE;
6654 }
6655
6656 GLboolean testFlag(r700_AssemblerBase *pAsm)
6657 {
6658 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6659
6660 //Test flag
6661 GLuint tmp = gethelpr(pAsm);
6662 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6663
6664 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
6665 pAsm->D.dst.math = 1;
6666 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6667 pAsm->D.dst.reg = tmp;
6668 pAsm->D.dst.writex = 1;
6669 pAsm->D.dst.writey = 0;
6670 pAsm->D.dst.writez = 0;
6671 pAsm->D.dst.writew = 0;
6672 pAsm->D2.dst2.literal_slots = 1;
6673 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6674 pAsm->D.dst.predicated = 1;
6675 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6676
6677 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6678 pAsm->S[0].src.reg = pAsm->flag_reg_index;
6679 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6680 noneg_PVSSRC(&(pAsm->S[0].src));
6681 pAsm->S[0].src.swizzlex = SQ_SEL_X;
6682 pAsm->S[0].src.swizzley = SQ_SEL_Y;
6683 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6684 pAsm->S[0].src.swizzlew = SQ_SEL_W;
6685 #if 0
6686 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
6687 //pAsm->S[1].src.reg = 0;
6688 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6689 noneg_PVSSRC(&(pAsm->S[1].src));
6690 pAsm->S[1].src.swizzlex = SQ_SEL_X;
6691 pAsm->S[1].src.swizzley = SQ_SEL_Y;
6692 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
6693 pAsm->S[1].src.swizzlew = SQ_SEL_W;
6694
6695 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6696 {
6697 return GL_FALSE;
6698 }
6699 #else
6700 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
6701 pAsm->S[1].src.reg = 0;
6702 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6703 noneg_PVSSRC(&(pAsm->S[1].src));
6704 pAsm->S[1].src.swizzlex = SQ_SEL_1;
6705 pAsm->S[1].src.swizzley = SQ_SEL_1;
6706 pAsm->S[1].src.swizzlez = SQ_SEL_1;
6707 pAsm->S[1].src.swizzlew = SQ_SEL_1;
6708
6709 if( GL_FALSE == next_ins(pAsm) )
6710 {
6711 return GL_FALSE;
6712 }
6713 #endif
6714
6715 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6716
6717 return GL_TRUE;
6718 }
6719
6720 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
6721 {
6722 testFlag(pAsm);
6723 jumpToOffest(pAsm, 1, 4);
6724 setRetInLoopFlag(pAsm, SQ_SEL_0);
6725 pops(pAsm, unIF + 1);
6726 add_return_inst(pAsm);
6727
6728 return GL_TRUE;
6729 }
6730
6731 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
6732 {
6733 testFlag(pAsm);
6734
6735 //break
6736 if(GL_FALSE == add_cf_instruction(pAsm) )
6737 {
6738 return GL_FALSE;
6739 }
6740
6741 if(8 == pAsm->unAsic)
6742 {
6743 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6744 1,
6745 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6746 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6747 EG_CF_INST_LOOP_BREAK,
6748 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6749 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6750 0,
6751 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6752 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6753 SQ_CF_COND_ACTIVE,
6754 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6755 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6756 0,
6757 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6758 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6759 0,
6760 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6761 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6762 0,
6763 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6764 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6765 1,
6766 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6767 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6768 1,
6769 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6770 }
6771 else
6772 {
6773 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6774 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6775 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6776
6777 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6778 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6779 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
6780 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6781
6782 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6783 }
6784
6785 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6786 (void *)pAsm->fc_stack[unFCSP].mid,
6787 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6788 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6789 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6790 pAsm->fc_stack[unFCSP].unNumMid++;
6791
6792 pops(pAsm, 1);
6793
6794 return GL_TRUE;
6795 }
6796
6797 GLboolean AssembleInstr(GLuint uiFirstInst,
6798 GLuint uiIL_Shift,
6799 GLuint uiNumberInsts,
6800 struct prog_instruction *pILInst,
6801 r700_AssemblerBase *pR700AsmCode)
6802 {
6803 GLuint i;
6804
6805 pR700AsmCode->pILInst = pILInst;
6806 for(i=uiFirstInst; i<uiNumberInsts; i++)
6807 {
6808 pR700AsmCode->uiCurInst = i;
6809
6810 #ifndef USE_CF_FOR_CONTINUE_BREAK
6811 if(OPCODE_BRK == pILInst[i+1].Opcode)
6812 {
6813 switch(pILInst[i].Opcode)
6814 {
6815 case OPCODE_SLE:
6816 pILInst[i].Opcode = OPCODE_SGT;
6817 break;
6818 case OPCODE_SLT:
6819 pILInst[i].Opcode = OPCODE_SGE;
6820 break;
6821 case OPCODE_SGE:
6822 pILInst[i].Opcode = OPCODE_SLT;
6823 break;
6824 case OPCODE_SGT:
6825 pILInst[i].Opcode = OPCODE_SLE;
6826 break;
6827 case OPCODE_SEQ:
6828 pILInst[i].Opcode = OPCODE_SNE;
6829 break;
6830 case OPCODE_SNE:
6831 pILInst[i].Opcode = OPCODE_SEQ;
6832 break;
6833 default:
6834 break;
6835 }
6836 }
6837 #endif
6838 if(pILInst[i].CondUpdate == 1)
6839 {
6840 /* remember dest register used for cond evaluation */
6841 /* XXX also handle PROGRAM_OUTPUT registers here? */
6842 pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
6843 }
6844
6845 switch (pILInst[i].Opcode)
6846 {
6847 case OPCODE_ABS:
6848 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
6849 return GL_FALSE;
6850 break;
6851 case OPCODE_ADD:
6852 case OPCODE_SUB:
6853 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
6854 return GL_FALSE;
6855 break;
6856
6857 case OPCODE_ARL:
6858 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
6859 return GL_FALSE;
6860 break;
6861 case OPCODE_ARR:
6862 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
6863 //if ( GL_FALSE == assemble_BAD("ARR") )
6864 return GL_FALSE;
6865 break;
6866
6867 case OPCODE_CMP:
6868 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
6869 return GL_FALSE;
6870 break;
6871 case OPCODE_COS:
6872 if(8 == pR700AsmCode->unAsic)
6873 {
6874 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_COS) )
6875 return GL_FALSE;
6876 }
6877 else
6878 {
6879 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
6880 return GL_FALSE;
6881 }
6882 break;
6883
6884 case OPCODE_DP2:
6885 case OPCODE_DP3:
6886 case OPCODE_DP4:
6887 case OPCODE_DPH:
6888 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
6889 return GL_FALSE;
6890 break;
6891
6892 case OPCODE_DST:
6893 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
6894 return GL_FALSE;
6895 break;
6896
6897 case OPCODE_EX2:
6898 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
6899 return GL_FALSE;
6900 break;
6901 case OPCODE_EXP:
6902 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
6903 return GL_FALSE;
6904 break;
6905
6906 case OPCODE_FLR:
6907 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
6908 return GL_FALSE;
6909 break;
6910 //case OP_FLR_INT: ;
6911
6912 // if ( GL_FALSE == assemble_FLR_INT() )
6913 // return GL_FALSE;
6914 // break;
6915
6916 case OPCODE_FRC:
6917 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
6918 return GL_FALSE;
6919 break;
6920
6921 case OPCODE_KIL:
6922 case OPCODE_KIL_NV:
6923 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
6924 return GL_FALSE;
6925 break;
6926 case OPCODE_LG2:
6927 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
6928 return GL_FALSE;
6929 break;
6930 case OPCODE_LIT:
6931 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
6932 return GL_FALSE;
6933 break;
6934 case OPCODE_LRP:
6935 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
6936 return GL_FALSE;
6937 break;
6938 case OPCODE_LOG:
6939 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
6940 return GL_FALSE;
6941 break;
6942
6943 case OPCODE_MAD:
6944 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
6945 return GL_FALSE;
6946 break;
6947 case OPCODE_MAX:
6948 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
6949 return GL_FALSE;
6950 break;
6951 case OPCODE_MIN:
6952 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
6953 return GL_FALSE;
6954 break;
6955
6956 case OPCODE_MOV:
6957 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
6958 return GL_FALSE;
6959 break;
6960 case OPCODE_MUL:
6961 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
6962 return GL_FALSE;
6963 break;
6964
6965 case OPCODE_NOISE1:
6966 {
6967 callPreSub(pR700AsmCode,
6968 GLSL_NOISE1,
6969 &noise1_presub,
6970 pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
6971 1);
6972 radeon_error("noise1: not yet supported shader instruction\n");
6973 };
6974 break;
6975 case OPCODE_NOISE2:
6976 radeon_error("noise2: not yet supported shader instruction\n");
6977 break;
6978 case OPCODE_NOISE3:
6979 radeon_error("noise3: not yet supported shader instruction\n");
6980 break;
6981 case OPCODE_NOISE4:
6982 radeon_error("noise4: not yet supported shader instruction\n");
6983 break;
6984
6985 case OPCODE_POW:
6986 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
6987 return GL_FALSE;
6988 break;
6989 case OPCODE_RCP:
6990 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
6991 return GL_FALSE;
6992 break;
6993 case OPCODE_RSQ:
6994 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
6995 return GL_FALSE;
6996 break;
6997 case OPCODE_SIN:
6998 if(8 == pR700AsmCode->unAsic)
6999 {
7000 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_SIN) )
7001 return GL_FALSE;
7002 }
7003 else
7004 {
7005 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
7006 return GL_FALSE;
7007 }
7008 break;
7009 case OPCODE_SCS:
7010 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
7011 return GL_FALSE;
7012 break;
7013
7014 case OPCODE_SEQ:
7015 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
7016 {
7017 return GL_FALSE;
7018 }
7019 break;
7020
7021 case OPCODE_SGT:
7022 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
7023 {
7024 return GL_FALSE;
7025 }
7026 break;
7027
7028 case OPCODE_SGE:
7029 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
7030 {
7031 return GL_FALSE;
7032 }
7033 break;
7034
7035 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
7036 case OPCODE_SLT:
7037 {
7038 struct prog_src_register SrcRegSave[2];
7039 SrcRegSave[0] = pILInst[i].SrcReg[0];
7040 SrcRegSave[1] = pILInst[i].SrcReg[1];
7041 pILInst[i].SrcReg[0] = SrcRegSave[1];
7042 pILInst[i].SrcReg[1] = SrcRegSave[0];
7043 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
7044 {
7045 pILInst[i].SrcReg[0] = SrcRegSave[0];
7046 pILInst[i].SrcReg[1] = SrcRegSave[1];
7047 return GL_FALSE;
7048 }
7049 pILInst[i].SrcReg[0] = SrcRegSave[0];
7050 pILInst[i].SrcReg[1] = SrcRegSave[1];
7051 }
7052 break;
7053
7054 case OPCODE_SLE:
7055 {
7056 struct prog_src_register SrcRegSave[2];
7057 SrcRegSave[0] = pILInst[i].SrcReg[0];
7058 SrcRegSave[1] = pILInst[i].SrcReg[1];
7059 pILInst[i].SrcReg[0] = SrcRegSave[1];
7060 pILInst[i].SrcReg[1] = SrcRegSave[0];
7061 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
7062 {
7063 pILInst[i].SrcReg[0] = SrcRegSave[0];
7064 pILInst[i].SrcReg[1] = SrcRegSave[1];
7065 return GL_FALSE;
7066 }
7067 pILInst[i].SrcReg[0] = SrcRegSave[0];
7068 pILInst[i].SrcReg[1] = SrcRegSave[1];
7069 }
7070 break;
7071
7072 case OPCODE_SNE:
7073 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
7074 {
7075 return GL_FALSE;
7076 }
7077 break;
7078
7079 //case OP_STP:
7080 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
7081 // return GL_FALSE;
7082 // break;
7083
7084 case OPCODE_SSG:
7085 if ( GL_FALSE == assemble_SSG(pR700AsmCode) )
7086 {
7087 return GL_FALSE;
7088 }
7089 break;
7090
7091 case OPCODE_SWZ:
7092 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
7093 {
7094 return GL_FALSE;
7095 }
7096 else
7097 {
7098 if( (i+1)<uiNumberInsts )
7099 {
7100 if(OPCODE_END != pILInst[i+1].Opcode)
7101 {
7102 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
7103 {
7104 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
7105 }
7106 }
7107 }
7108 }
7109 break;
7110 case OPCODE_DDX:
7111 case OPCODE_DDY:
7112 case OPCODE_TEX:
7113 case OPCODE_TXB:
7114 case OPCODE_TXL:
7115 case OPCODE_TXP:
7116 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
7117 return GL_FALSE;
7118 break;
7119
7120 case OPCODE_TRUNC:
7121 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
7122 return GL_FALSE;
7123 break;
7124
7125 case OPCODE_XPD:
7126 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
7127 return GL_FALSE;
7128 break;
7129
7130 case OPCODE_IF:
7131 {
7132 GLboolean bHasElse = GL_FALSE;
7133
7134 if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
7135 {
7136 bHasElse = GL_TRUE;
7137 }
7138
7139 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
7140 {
7141 return GL_FALSE;
7142 }
7143 }
7144 break;
7145
7146 case OPCODE_ELSE :
7147 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
7148 return GL_FALSE;
7149 break;
7150
7151 case OPCODE_ENDIF:
7152 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
7153 return GL_FALSE;
7154 break;
7155
7156 case OPCODE_BGNLOOP:
7157 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
7158 {
7159 return GL_FALSE;
7160 }
7161 break;
7162
7163 case OPCODE_BRK:
7164 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
7165 {
7166 return GL_FALSE;
7167 }
7168 break;
7169
7170 case OPCODE_CONT:
7171 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
7172 {
7173 return GL_FALSE;
7174 }
7175 break;
7176
7177 case OPCODE_ENDLOOP:
7178 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
7179 {
7180 return GL_FALSE;
7181 }
7182 break;
7183
7184 case OPCODE_BGNSUB:
7185 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
7186 {
7187 return GL_FALSE;
7188 }
7189 break;
7190
7191 case OPCODE_RET:
7192 if( GL_FALSE == assemble_RET(pR700AsmCode) )
7193 {
7194 return GL_FALSE;
7195 }
7196 break;
7197
7198 case OPCODE_CAL:
7199 if( GL_FALSE == assemble_CAL(pR700AsmCode,
7200 pILInst[i].BranchTarget,
7201 uiIL_Shift,
7202 uiNumberInsts,
7203 pILInst,
7204 NULL) )
7205 {
7206 return GL_FALSE;
7207 }
7208 break;
7209
7210 //case OPCODE_EXPORT:
7211 // if ( GL_FALSE == assemble_EXPORT() )
7212 // return GL_FALSE;
7213 // break;
7214
7215 case OPCODE_ENDSUB:
7216 return assemble_ENDSUB(pR700AsmCode);
7217
7218 case OPCODE_END:
7219 //pR700AsmCode->uiCurInst = i;
7220 //This is to remaind that if in later exoort there is depth/stencil
7221 //export, we need a mov to re-arrange DST channel, where using a
7222 //psuedo inst, we will use this end inst to do it.
7223 return GL_TRUE;
7224
7225 default:
7226 radeon_error("r600: unknown instruction %d\n", pILInst[i].Opcode);
7227 return GL_FALSE;
7228 }
7229 }
7230
7231 return GL_TRUE;
7232 }
7233
7234 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
7235 {
7236 #ifndef GENERATE_SHADER_FOR_2D
7237 setRetInLoopFlag(pAsm, SQ_SEL_0);
7238 #endif
7239
7240 if((SPT_FP == pAsm->currentShaderType) && (8 == pAsm->unAsic))
7241 {
7242 EG_add_ps_interp(pAsm);
7243 }
7244
7245 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7246 return GL_TRUE;
7247 }
7248
7249 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
7250 {
7251 GLuint i;
7252 GLuint unCFoffset;
7253 TypedShaderList * plstCFmain;
7254 TypedShaderList * plstCFsub;
7255
7256 R700ShaderInstruction * pInst;
7257 R700ControlFlowGenericClause * pCFInst;
7258
7259 R700ControlFlowALUClause * pCF_ALU;
7260 R700ALUInstruction * pALU;
7261 GLuint unConstOffset = 0;
7262 GLuint unRegOffset;
7263 GLuint unMinRegIndex;
7264
7265 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
7266
7267 #ifndef GENERATE_SHADER_FOR_2D
7268 /* remove flags init if they are not used */
7269 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
7270 {
7271 R700ControlFlowALUClause * pCF_ALU;
7272 pInst = plstCFmain->pHead;
7273 while(pInst)
7274 {
7275 if(SIT_CF_ALU == pInst->m_ShaderInstType)
7276 {
7277 pCF_ALU = (R700ControlFlowALUClause *)pInst;
7278 if(0 == pCF_ALU->m_Word1.f.count)
7279 {
7280 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
7281 }
7282 else
7283 {
7284 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
7285
7286 pALU->m_pLinkedALUClause = NULL;
7287 pALU = (R700ALUInstruction *)(pALU->pNextInst);
7288 pALU->m_pLinkedALUClause = pCF_ALU;
7289 pCF_ALU->m_pLinkedALUInstruction = pALU;
7290
7291 pCF_ALU->m_Word1.f.count--;
7292 }
7293 break;
7294 }
7295 pInst = pInst->pNextInst;
7296 };
7297 }
7298 #endif /* GENERATE_SHADER_FOR_2D */
7299
7300 if(pAsm->CALLSTACK[0].max > 0)
7301 {
7302 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
7303 }
7304
7305 if(0 == pAsm->unSubArrayPointer)
7306 {
7307 return GL_TRUE;
7308 }
7309
7310 unCFoffset = plstCFmain->uNumOfNode;
7311
7312 if(NULL != pILProg->Parameters)
7313 {
7314 unConstOffset = pILProg->Parameters->NumParameters;
7315 }
7316
7317 /* Reloc subs */
7318 for(i=0; i<pAsm->unSubArrayPointer; i++)
7319 {
7320 pAsm->subs[i].unCFoffset = unCFoffset;
7321 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
7322
7323 pInst = plstCFsub->pHead;
7324
7325 /* reloc instructions */
7326 while(pInst)
7327 {
7328 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
7329 {
7330 pCFInst = (R700ControlFlowGenericClause *)pInst;
7331
7332 switch (pCFInst->m_Word1.f.cf_inst)
7333 {
7334 case SQ_CF_INST_POP:
7335 case SQ_CF_INST_JUMP:
7336 case SQ_CF_INST_ELSE:
7337 case SQ_CF_INST_LOOP_END:
7338 case SQ_CF_INST_LOOP_START:
7339 case SQ_CF_INST_LOOP_START_NO_AL:
7340 case SQ_CF_INST_LOOP_CONTINUE:
7341 case SQ_CF_INST_LOOP_BREAK:
7342 pCFInst->m_Word0.f.addr += unCFoffset;
7343 break;
7344 default:
7345 break;
7346 }
7347 }
7348
7349 pInst->m_uIndex += unCFoffset;
7350
7351 pInst = pInst->pNextInst;
7352 };
7353
7354 if(NULL != pAsm->subs[i].pPresubDesc)
7355 {
7356 GLuint uNumSrc;
7357
7358 unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
7359 unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
7360 unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
7361
7362 pInst = plstCFsub->pHead;
7363 while(pInst)
7364 {
7365 if(SIT_CF_ALU == pInst->m_ShaderInstType)
7366 {
7367 pCF_ALU = (R700ControlFlowALUClause *)pInst;
7368
7369 pALU = pCF_ALU->m_pLinkedALUInstruction;
7370 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7371 {
7372 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7373
7374 if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
7375 {
7376 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7377 }
7378 else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
7379 {
7380 pALU->m_Word0.f.src0_sel += unConstOffset;
7381 }
7382
7383 if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
7384 >= SQ_OP3_INST_MUL_LIT )
7385 { /* op3 : 3 srcs */
7386 if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
7387 {
7388 pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
7389 }
7390 else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
7391 {
7392 pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
7393 }
7394 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7395 {
7396 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7397 }
7398 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7399 {
7400 pALU->m_Word0.f.src1_sel += unConstOffset;
7401 }
7402 }
7403 else
7404 {
7405 if(8 == pAsm->unAsic)
7406 {
7407 uNumSrc = EG_GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7408 }
7409 else
7410 {
7411 if(pAsm->bR6xx)
7412 {
7413 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
7414 }
7415 else
7416 {
7417 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7418 }
7419 }
7420 if(2 == uNumSrc)
7421 { /* 2 srcs */
7422 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7423 {
7424 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7425 }
7426 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7427 {
7428 pALU->m_Word0.f.src1_sel += unConstOffset;
7429 }
7430 }
7431 }
7432 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7433 }
7434 }
7435 pInst = pInst->pNextInst;
7436 };
7437 }
7438
7439 /* Put sub into main */
7440 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
7441 plstCFmain->pTail = plstCFsub->pTail;
7442 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
7443
7444 unCFoffset += plstCFsub->uNumOfNode;
7445 }
7446
7447 /* reloc callers */
7448 for(i=0; i<pAsm->unCallerArrayPointer; i++)
7449 {
7450 pAsm->callers[i].cf_ptr->m_Word0.f.addr
7451 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
7452
7453 if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
7454 {
7455 unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
7456 unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
7457
7458 if(NULL != pAsm->callers[i].prelude_cf_ptr)
7459 {
7460 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
7461 pALU = pCF_ALU->m_pLinkedALUInstruction;
7462 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7463 {
7464 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7465 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7466 }
7467 }
7468 if(NULL != pAsm->callers[i].finale_cf_ptr)
7469 {
7470 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
7471 pALU = pCF_ALU->m_pLinkedALUInstruction;
7472 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7473 {
7474 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7475 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7476 }
7477 }
7478 }
7479 }
7480
7481 return GL_TRUE;
7482 }
7483
7484 GLboolean callPreSub(r700_AssemblerBase* pAsm,
7485 LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
7486 COMPILED_SUB * pCompiledSub,
7487 GLshort uOutReg,
7488 GLshort uNumValidSrc)
7489 {
7490 /* save assemble context */
7491 GLuint starting_temp_register_number_save;
7492 GLuint number_used_registers_save;
7493 GLuint uFirstHelpReg_save;
7494 GLuint uHelpReg_save;
7495 GLuint uiCurInst_save;
7496 struct prog_instruction *pILInst_save;
7497 PRESUB_DESC * pPresubDesc;
7498 GLboolean bRet;
7499 int i;
7500
7501 R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
7502
7503 /* copy srcs to presub inputs */
7504 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7505 for(i=0; i<uNumValidSrc; i++)
7506 {
7507 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7508 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
7509 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
7510 pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
7511 pAsm->D.dst.writex = 1;
7512 pAsm->D.dst.writey = 1;
7513 pAsm->D.dst.writez = 1;
7514 pAsm->D.dst.writew = 1;
7515
7516 if( GL_FALSE == assemble_src(pAsm, i, 0) )
7517 {
7518 return GL_FALSE;
7519 }
7520
7521 next_ins(pAsm);
7522 }
7523 if(uNumValidSrc > 0)
7524 {
7525 prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7526 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7527 }
7528
7529 /* browse thro existing presubs. */
7530 for(i=0; i<pAsm->unNumPresub; i++)
7531 {
7532 if(pAsm->presubs[i].sptSigniture == scriptSigniture)
7533 {
7534 break;
7535 }
7536 }
7537
7538 if(i == pAsm->unNumPresub)
7539 { /* not loaded yet */
7540 /* save assemble context */
7541 number_used_registers_save = pAsm->number_used_registers;
7542 uFirstHelpReg_save = pAsm->uFirstHelpReg;
7543 uHelpReg_save = pAsm->uHelpReg;
7544 starting_temp_register_number_save = pAsm->starting_temp_register_number;
7545 pILInst_save = pAsm->pILInst;
7546 uiCurInst_save = pAsm->uiCurInst;
7547
7548 /* alloc in presub */
7549 if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
7550 {
7551 pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
7552 sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
7553 sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
7554 if(NULL == pAsm->presubs)
7555 {
7556 radeon_error("No memeory to allocate built in shader function description structures. \n");
7557 return GL_FALSE;
7558 }
7559 pAsm->unPresubArraySize += 4;
7560 }
7561
7562 pPresubDesc = &(pAsm->presubs[i]);
7563 pPresubDesc->sptSigniture = scriptSigniture;
7564
7565 /* constants offsets need to be final resolved at reloc. */
7566 if(0 == pAsm->unNumPresub)
7567 {
7568 pPresubDesc->unConstantsStart = 0;
7569 }
7570 else
7571 {
7572 pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
7573 + pAsm->presubs[i-1].pCompiledSub->NumParameters;
7574 }
7575
7576 pPresubDesc->pCompiledSub = pCompiledSub;
7577
7578 pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
7579 pPresubDesc->maxStartReg = uFirstHelpReg_save;
7580 pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
7581
7582 pAsm->unNumPresub++;
7583
7584 /* setup new assemble context */
7585 pAsm->starting_temp_register_number = 0;
7586 pAsm->number_used_registers = pCompiledSub->NumTemporaries;
7587 pAsm->uFirstHelpReg = pAsm->number_used_registers;
7588 pAsm->uHelpReg = pAsm->uFirstHelpReg;
7589
7590 bRet = assemble_CAL(pAsm,
7591 0,
7592 pPresubDesc->subIL_Shift,
7593 pCompiledSub->NumInstructions,
7594 pCompiledSub->Instructions,
7595 pPresubDesc);
7596
7597
7598 pPresubDesc->number_used_registers = pAsm->number_used_registers;
7599
7600 /* restore assemble context */
7601 pAsm->number_used_registers = number_used_registers_save;
7602 pAsm->uFirstHelpReg = uFirstHelpReg_save;
7603 pAsm->uHelpReg = uHelpReg_save;
7604 pAsm->starting_temp_register_number = starting_temp_register_number_save;
7605 pAsm->pILInst = pILInst_save;
7606 pAsm->uiCurInst = uiCurInst_save;
7607 }
7608 else
7609 { /* was loaded */
7610 pPresubDesc = &(pAsm->presubs[i]);
7611
7612 bRet = assemble_CAL(pAsm,
7613 0,
7614 pPresubDesc->subIL_Shift,
7615 pCompiledSub->NumInstructions,
7616 pCompiledSub->Instructions,
7617 pPresubDesc);
7618 }
7619
7620 if(GL_FALSE == bRet)
7621 {
7622 radeon_error("Shader presub assemble failed. \n");
7623 }
7624 else
7625 {
7626 /* copy presub output to real dst */
7627 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7628 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7629
7630 if( GL_FALSE == assemble_dst(pAsm) )
7631 {
7632 return GL_FALSE;
7633 }
7634
7635 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
7636 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
7637 pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
7638 pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
7639 pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
7640 pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
7641 pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
7642
7643 next_ins(pAsm);
7644
7645 pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7646 pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
7647 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7648 }
7649
7650 if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
7651 {
7652 pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
7653 }
7654 if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
7655 {
7656 pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
7657 }
7658
7659 return bRet;
7660 }
7661
7662 GLboolean Process_Export(r700_AssemblerBase* pAsm,
7663 GLuint type,
7664 GLuint export_starting_index,
7665 GLuint export_count,
7666 GLuint starting_register_number,
7667 GLboolean is_depth_export)
7668 {
7669 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
7670 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
7671
7672 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
7673
7674 switch (type)
7675 {
7676 case SQ_EXPORT_PIXEL:
7677 if(GL_TRUE == is_depth_export)
7678 {
7679 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
7680 }
7681 else
7682 {
7683 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
7684 }
7685 break;
7686
7687 case SQ_EXPORT_POS:
7688 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
7689 break;
7690
7691 case SQ_EXPORT_PARAM:
7692 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
7693 break;
7694
7695 default:
7696 radeon_error("Unknown export type: %d\n", type);
7697 return GL_FALSE;
7698 break;
7699 }
7700
7701 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
7702
7703 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
7704 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
7705 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
7706
7707 if(8 == pAsm->unAsic)
7708 {
7709 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7710 (export_count - 1),
7711 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift,
7712 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask);
7713 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7714 0,
7715 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7716 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
7717 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7718 0,
7719 EG_CF_ALLOC_EXPORT_WORD1__VPM_shift,
7720 EG_CF_ALLOC_EXPORT_WORD1__VPM_bit);
7721 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7722 EG_CF_INST_EXPORT,
7723 EG_CF_WORD1__CF_INST_shift,
7724 EG_CF_WORD1__CF_INST_mask);
7725 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7726 0,
7727 EG_CF_ALLOC_EXPORT_WORD1__MARK_shift,
7728 EG_CF_ALLOC_EXPORT_WORD1__MARK_bit);
7729 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7730 1,
7731 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift,
7732 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit);
7733 }
7734 else
7735 {
7736 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
7737 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
7738 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
7739 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
7740 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
7741 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
7742 }
7743
7744 if (export_count == 1)
7745 {
7746 assert(starting_register_number >= pAsm->starting_export_register_number);
7747
7748 /* exports Z as a float into Red channel */
7749 if (GL_TRUE == is_depth_export)
7750 {
7751 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_Z;
7752 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
7753 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
7754 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
7755 }
7756 else
7757 {
7758 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7759 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7760 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7761 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7762 }
7763 }
7764 else
7765 {
7766 // This should only be used if all components for all registers have been written
7767 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7768 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7769 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7770 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7771 }
7772
7773 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
7774
7775 return GL_TRUE;
7776 }
7777
7778 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
7779 GLbitfield OutputsWritten)
7780 {
7781 unsigned int unBit;
7782 GLuint export_count = 0;
7783 unsigned int i;
7784
7785 for (i = 0; i < FRAG_RESULT_MAX; ++i)
7786 {
7787 unBit = 1 << i;
7788
7789 if (OutputsWritten & unBit)
7790 {
7791 GLboolean is_depth = i == FRAG_RESULT_DEPTH ? GL_TRUE : GL_FALSE;
7792 if (!Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->uiFP_OutputMap[i], is_depth))
7793 return GL_FALSE;
7794 ++export_count;
7795 }
7796 }
7797
7798 /* Need to export something, otherwise we'll hang
7799 * results are undefined anyway */
7800 if(export_count == 0)
7801 {
7802 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
7803 }
7804
7805 if(pR700AsmCode->cf_last_export_ptr != NULL)
7806 {
7807 if(8 == pR700AsmCode->unAsic)
7808 {
7809 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7810 1,
7811 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7812 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
7813 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7814 EG_CF_INST_EXPORT_DONE,
7815 EG_CF_WORD1__CF_INST_shift,
7816 EG_CF_WORD1__CF_INST_mask);
7817 }
7818 else
7819 {
7820 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7821 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
7822 }
7823 }
7824
7825 return GL_TRUE;
7826 }
7827
7828 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
7829 GLbitfield OutputsWritten)
7830 {
7831 unsigned int unBit;
7832 unsigned int i;
7833
7834 GLuint export_starting_index = 0;
7835 GLuint export_count = pR700AsmCode->number_of_exports;
7836
7837 unBit = 1 << VERT_RESULT_HPOS;
7838 if(OutputsWritten & unBit)
7839 {
7840 if( GL_FALSE == Process_Export(pR700AsmCode,
7841 SQ_EXPORT_POS,
7842 export_starting_index,
7843 1,
7844 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
7845 GL_FALSE) )
7846 {
7847 return GL_FALSE;
7848 }
7849 export_starting_index++;
7850 export_count--;
7851 }
7852
7853 unBit = 1 << VERT_RESULT_PSIZ;
7854 if(OutputsWritten & unBit)
7855 {
7856 if( GL_FALSE == Process_Export(pR700AsmCode,
7857 SQ_EXPORT_POS,
7858 export_starting_index,
7859 1,
7860 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_PSIZ],
7861 GL_FALSE) )
7862 {
7863 return GL_FALSE;
7864 }
7865 export_count--;
7866 }
7867
7868 if(8 == pR700AsmCode->unAsic)
7869 {
7870 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7871 EG_CF_INST_EXPORT_DONE,
7872 EG_CF_WORD1__CF_INST_shift,
7873 EG_CF_WORD1__CF_INST_mask);
7874 }
7875 else
7876 {
7877 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7878 }
7879
7880
7881 pR700AsmCode->number_of_exports = export_count;
7882 export_starting_index = 0;
7883
7884 unBit = 1 << VERT_RESULT_COL0;
7885 if(OutputsWritten & unBit)
7886 {
7887 if( GL_FALSE == Process_Export(pR700AsmCode,
7888 SQ_EXPORT_PARAM,
7889 export_starting_index,
7890 1,
7891 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
7892 GL_FALSE) )
7893 {
7894 return GL_FALSE;
7895 }
7896
7897 export_starting_index++;
7898 }
7899
7900 unBit = 1 << VERT_RESULT_COL1;
7901 if(OutputsWritten & unBit)
7902 {
7903 if( GL_FALSE == Process_Export(pR700AsmCode,
7904 SQ_EXPORT_PARAM,
7905 export_starting_index,
7906 1,
7907 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
7908 GL_FALSE) )
7909 {
7910 return GL_FALSE;
7911 }
7912
7913 export_starting_index++;
7914 }
7915
7916 unBit = 1 << VERT_RESULT_FOGC;
7917 if(OutputsWritten & unBit)
7918 {
7919 if( GL_FALSE == Process_Export(pR700AsmCode,
7920 SQ_EXPORT_PARAM,
7921 export_starting_index,
7922 1,
7923 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
7924 GL_FALSE) )
7925 {
7926 return GL_FALSE;
7927 }
7928
7929 export_starting_index++;
7930 }
7931
7932 for(i=0; i<8; i++)
7933 {
7934 unBit = 1 << (VERT_RESULT_TEX0 + i);
7935 if(OutputsWritten & unBit)
7936 {
7937 if( GL_FALSE == Process_Export(pR700AsmCode,
7938 SQ_EXPORT_PARAM,
7939 export_starting_index,
7940 1,
7941 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
7942 GL_FALSE) )
7943 {
7944 return GL_FALSE;
7945 }
7946
7947 export_starting_index++;
7948 }
7949 }
7950
7951 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
7952 {
7953 unBit = 1 << i;
7954 if(OutputsWritten & unBit)
7955 {
7956 if( GL_FALSE == Process_Export(pR700AsmCode,
7957 SQ_EXPORT_PARAM,
7958 export_starting_index,
7959 1,
7960 pR700AsmCode->ucVP_OutputMap[i],
7961 GL_FALSE) )
7962 {
7963 return GL_FALSE;
7964 }
7965
7966 export_starting_index++;
7967 }
7968 }
7969
7970 // At least one param should be exported
7971 if (export_count)
7972 {
7973 if(8 == pR700AsmCode->unAsic)
7974 {
7975 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7976 EG_CF_INST_EXPORT_DONE,
7977 EG_CF_WORD1__CF_INST_shift,
7978 EG_CF_WORD1__CF_INST_mask);
7979 }
7980 else
7981 {
7982 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7983 }
7984 }
7985 else
7986 {
7987 if( GL_FALSE == Process_Export(pR700AsmCode,
7988 SQ_EXPORT_PARAM,
7989 0,
7990 1,
7991 pR700AsmCode->starting_export_register_number,
7992 GL_FALSE) )
7993 {
7994 return GL_FALSE;
7995 }
7996
7997 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
7998 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
7999 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
8000 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
8001 if(8 == pR700AsmCode->unAsic)
8002 {
8003 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
8004 EG_CF_INST_EXPORT_DONE,
8005 EG_CF_WORD1__CF_INST_shift,
8006 EG_CF_WORD1__CF_INST_mask);
8007 }
8008 else
8009 {
8010 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
8011 }
8012 }
8013
8014 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
8015
8016 return GL_TRUE;
8017 }
8018
8019 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
8020 {
8021 if(NULL != pR700AsmCode->pInstDeps)
8022 {
8023 FREE(pR700AsmCode->pInstDeps);
8024 pR700AsmCode->pInstDeps = NULL;
8025 }
8026
8027 if(NULL != pR700AsmCode->subs)
8028 {
8029 FREE(pR700AsmCode->subs);
8030 pR700AsmCode->subs = NULL;
8031 }
8032 if(NULL != pR700AsmCode->callers)
8033 {
8034 FREE(pR700AsmCode->callers);
8035 pR700AsmCode->callers = NULL;
8036 }
8037
8038 if(NULL != pR700AsmCode->presubs)
8039 {
8040 FREE(pR700AsmCode->presubs);
8041 pR700AsmCode->presubs = NULL;
8042 }
8043
8044 return GL_TRUE;
8045 }
8046