radeon: Remove set-but-unused variables in radeonSetTexBuffer2() variants.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "program/prog_parameter.h"
36
37 #include "radeon_debug.h"
38 #include "r600_context.h"
39
40 #include "r700_assembler.h"
41 #include "evergreen_sq.h"
42
43 #define USE_CF_FOR_CONTINUE_BREAK 1
44 #define USE_CF_FOR_POP_AFTER 1
45
46 struct prog_instruction noise1_insts[12] = {
47 {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
52 {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
53 {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
58 {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
59 };
60 float noise1_const[2][4] = {
61 {0.300000f, 0.900000f, 0.500000f, 0.300000f}
62 };
63
64 COMPILED_SUB noise1_presub = {
65 &(noise1_insts[0]),
66 12,
67 2,
68 1,
69 0,
70 &(noise1_const[0]),
71 SWIZZLE_X,
72 SWIZZLE_X,
73 SWIZZLE_X,
74 SWIZZLE_X,
75 {0,0,0},
76 0
77 };
78
79 BITS addrmode_PVSDST(PVSDST * pPVSDST)
80 {
81 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
82 }
83
84 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
85 {
86 pPVSDST->addrmode0 = addrmode & 1;
87 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
88 }
89
90 void nomask_PVSDST(PVSDST * pPVSDST)
91 {
92 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
93 }
94
95 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
96 {
97 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
98 }
99
100 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
101 {
102 pPVSSRC->addrmode0 = addrmode & 1;
103 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
104 }
105
106
107 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
108 {
109 pPVSSRC->swizzlex =
110 pPVSSRC->swizzley =
111 pPVSSRC->swizzlez =
112 pPVSSRC->swizzlew = swz;
113 }
114
115 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
116 {
117 pPVSSRC->swizzlex = SQ_SEL_X;
118 pPVSSRC->swizzley = SQ_SEL_Y;
119 pPVSSRC->swizzlez = SQ_SEL_Z;
120 pPVSSRC->swizzlew = SQ_SEL_W;
121 }
122
123 void
124 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
125 {
126 switch (x)
127 {
128 case SQ_SEL_X: x = pPVSSRC->swizzlex;
129 break;
130 case SQ_SEL_Y: x = pPVSSRC->swizzley;
131 break;
132 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
133 break;
134 case SQ_SEL_W: x = pPVSSRC->swizzlew;
135 break;
136 default:;
137 }
138
139 switch (y)
140 {
141 case SQ_SEL_X: y = pPVSSRC->swizzlex;
142 break;
143 case SQ_SEL_Y: y = pPVSSRC->swizzley;
144 break;
145 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
146 break;
147 case SQ_SEL_W: y = pPVSSRC->swizzlew;
148 break;
149 default:;
150 }
151
152 switch (z)
153 {
154 case SQ_SEL_X: z = pPVSSRC->swizzlex;
155 break;
156 case SQ_SEL_Y: z = pPVSSRC->swizzley;
157 break;
158 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
159 break;
160 case SQ_SEL_W: z = pPVSSRC->swizzlew;
161 break;
162 default:;
163 }
164
165 switch (w)
166 {
167 case SQ_SEL_X: w = pPVSSRC->swizzlex;
168 break;
169 case SQ_SEL_Y: w = pPVSSRC->swizzley;
170 break;
171 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
172 break;
173 case SQ_SEL_W: w = pPVSSRC->swizzlew;
174 break;
175 default:;
176 }
177
178 pPVSSRC->swizzlex = x;
179 pPVSSRC->swizzley = y;
180 pPVSSRC->swizzlez = z;
181 pPVSSRC->swizzlew = w;
182 }
183
184 void neg_PVSSRC(PVSSRC* pPVSSRC)
185 {
186 pPVSSRC->negx = 1;
187 pPVSSRC->negy = 1;
188 pPVSSRC->negz = 1;
189 pPVSSRC->negw = 1;
190 }
191
192 void noneg_PVSSRC(PVSSRC* pPVSSRC)
193 {
194 pPVSSRC->negx = 0;
195 pPVSSRC->negy = 0;
196 pPVSSRC->negz = 0;
197 pPVSSRC->negw = 0;
198 }
199
200 // negate argument (for SUB instead of ADD and alike)
201 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
202 {
203 pPVSSRC->negx = !pPVSSRC->negx;
204 pPVSSRC->negy = !pPVSSRC->negy;
205 pPVSSRC->negz = !pPVSSRC->negz;
206 pPVSSRC->negw = !pPVSSRC->negw;
207 }
208
209 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
210 {
211 switch (c)
212 {
213 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
214 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
215 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
216 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
217 default:;
218 }
219 }
220
221 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
222 {
223 switch (c)
224 {
225 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
226 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
227 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
228 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
229 default:;
230 }
231 }
232
233 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
234 {
235 return (pOutVTXFmt0->point_size |
236 pOutVTXFmt0->edge_flag |
237 pOutVTXFmt0->rta_index |
238 pOutVTXFmt0->kill_flag |
239 pOutVTXFmt0->viewport_index);
240 }
241
242 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
243 {
244 return (pFPOutFmt->depth |
245 pFPOutFmt->stencil_ref |
246 pFPOutFmt->mask |
247 pFPOutFmt->coverage_to_mask);
248 }
249
250 GLboolean is_reduction_opcode(PVSDWORD* dest)
251 {
252 if (dest->dst.op3 == 0)
253 {
254 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
255 {
256 return GL_TRUE;
257 }
258 }
259 return GL_FALSE;
260 }
261
262 #if 0 /* unused */
263 GLboolean EG_is_reduction_opcode(PVSDWORD* dest)
264 {
265 if (dest->dst.op3 == 0)
266 {
267 if ( (dest->dst.opcode == EG_OP2_INST_DOT4 || dest->dst.opcode == EG_OP2_INST_DOT4_IEEE || dest->dst.opcode == EG_OP2_INST_CUBE) )
268 {
269 return GL_TRUE;
270 }
271 }
272 return GL_FALSE;
273 }
274 #endif
275
276 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
277 {
278 GLuint format = FMT_INVALID;
279 GLuint uiElemSize = 0;
280
281 switch (eType)
282 {
283 case GL_BYTE:
284 case GL_UNSIGNED_BYTE:
285 uiElemSize = 1;
286 switch(nChannels)
287 {
288 case 1:
289 format = FMT_8; break;
290 case 2:
291 format = FMT_8_8; break;
292 case 3:
293 /* for some (small/unaligned) strides using 4 comps works
294 * better, probably same as GL_SHORT below
295 * test piglit/draw-vertices */
296 format = FMT_8_8_8_8; break;
297 case 4:
298 format = FMT_8_8_8_8; break;
299 default:
300 break;
301 }
302 break;
303
304 case GL_UNSIGNED_SHORT:
305 case GL_SHORT:
306 uiElemSize = 2;
307 switch(nChannels)
308 {
309 case 1:
310 format = FMT_16; break;
311 case 2:
312 format = FMT_16_16; break;
313 case 3:
314 /* 3 comp GL_SHORT vertex format doesnt work on r700
315 4 somehow works, test - sauerbraten */
316 format = FMT_16_16_16_16; break;
317 case 4:
318 format = FMT_16_16_16_16; break;
319 default:
320 break;
321 }
322 break;
323
324 case GL_UNSIGNED_INT:
325 case GL_INT:
326 uiElemSize = 4;
327 switch(nChannels)
328 {
329 case 1:
330 format = FMT_32; break;
331 case 2:
332 format = FMT_32_32; break;
333 case 3:
334 format = FMT_32_32_32; break;
335 case 4:
336 format = FMT_32_32_32_32; break;
337 default:
338 break;
339 }
340 break;
341
342 case GL_FLOAT:
343 uiElemSize = 4;
344 switch(nChannels)
345 {
346 case 1:
347 format = FMT_32_FLOAT; break;
348 case 2:
349 format = FMT_32_32_FLOAT; break;
350 case 3:
351 format = FMT_32_32_32_FLOAT; break;
352 case 4:
353 format = FMT_32_32_32_32_FLOAT; break;
354 default:
355 break;
356 }
357 break;
358 case GL_DOUBLE:
359 uiElemSize = 8;
360 switch(nChannels)
361 {
362 case 1:
363 format = FMT_32_FLOAT; break;
364 case 2:
365 format = FMT_32_32_FLOAT; break;
366 case 3:
367 format = FMT_32_32_32_FLOAT; break;
368 case 4:
369 format = FMT_32_32_32_32_FLOAT; break;
370 default:
371 break;
372 }
373 break;
374 default:
375 ;
376 //GL_ASSERT_NO_CASE();
377 }
378
379 if(NULL != pClient_size)
380 {
381 *pClient_size = uiElemSize * nChannels;
382 }
383
384 return(format);
385 }
386
387 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
388 {
389 if(nIsOp3 > 0)
390 {
391 return 3;
392 }
393
394 switch (opcode)
395 {
396 case SQ_OP2_INST_ADD:
397 case SQ_OP2_INST_KILLE:
398 case SQ_OP2_INST_KILLGT:
399 case SQ_OP2_INST_KILLGE:
400 case SQ_OP2_INST_KILLNE:
401 case SQ_OP2_INST_MUL:
402 case SQ_OP2_INST_MAX:
403 case SQ_OP2_INST_MIN:
404 //case SQ_OP2_INST_MAX_DX10:
405 //case SQ_OP2_INST_MIN_DX10:
406 case SQ_OP2_INST_SETE:
407 case SQ_OP2_INST_SETNE:
408 case SQ_OP2_INST_SETGT:
409 case SQ_OP2_INST_SETGE:
410 case SQ_OP2_INST_PRED_SETE:
411 case SQ_OP2_INST_PRED_SETGT:
412 case SQ_OP2_INST_PRED_SETGE:
413 case SQ_OP2_INST_PRED_SETNE:
414 case SQ_OP2_INST_DOT4:
415 case SQ_OP2_INST_DOT4_IEEE:
416 case SQ_OP2_INST_CUBE:
417 return 2;
418
419 case SQ_OP2_INST_MOV:
420 case SQ_OP2_INST_MOVA_FLOOR:
421 case SQ_OP2_INST_FRACT:
422 case SQ_OP2_INST_FLOOR:
423 case SQ_OP2_INST_TRUNC:
424 case SQ_OP2_INST_EXP_IEEE:
425 case SQ_OP2_INST_LOG_CLAMPED:
426 case SQ_OP2_INST_LOG_IEEE:
427 case SQ_OP2_INST_RECIP_IEEE:
428 case SQ_OP2_INST_RECIPSQRT_IEEE:
429 case SQ_OP2_INST_FLT_TO_INT:
430 case SQ_OP2_INST_SIN:
431 case SQ_OP2_INST_COS:
432 return 1;
433
434 default: radeon_error(
435 "Need instruction operand number for %x.\n", opcode);
436 };
437
438 return 3;
439 }
440
441 unsigned int EG_GetNumOperands(GLuint opcode, GLuint nIsOp3)
442 {
443 if(nIsOp3 > 0)
444 {
445 return 3;
446 }
447
448 switch (opcode)
449 {
450 case EG_OP2_INST_ADD:
451 case EG_OP2_INST_KILLE:
452 case EG_OP2_INST_KILLGT:
453 case EG_OP2_INST_KILLGE:
454 case EG_OP2_INST_KILLNE:
455 case EG_OP2_INST_MUL:
456 case EG_OP2_INST_MAX:
457 case EG_OP2_INST_MIN:
458 //case EG_OP2_INST_MAX_DX10:
459 //case EG_OP2_INST_MIN_DX10:
460 case EG_OP2_INST_SETE:
461 case EG_OP2_INST_SETNE:
462 case EG_OP2_INST_SETGT:
463 case EG_OP2_INST_SETGE:
464 case EG_OP2_INST_PRED_SETE:
465 case EG_OP2_INST_PRED_SETGT:
466 case EG_OP2_INST_PRED_SETGE:
467 case EG_OP2_INST_PRED_SETNE:
468 case EG_OP2_INST_DOT4:
469 case EG_OP2_INST_DOT4_IEEE:
470 case EG_OP2_INST_CUBE:
471 return 2;
472
473 case EG_OP2_INST_MOV:
474 //case SQ_OP2_INST_MOVA_FLOOR:
475 case EG_OP2_INST_FRACT:
476 case EG_OP2_INST_FLOOR:
477 case EG_OP2_INST_TRUNC:
478 case EG_OP2_INST_EXP_IEEE:
479 case EG_OP2_INST_LOG_CLAMPED:
480 case EG_OP2_INST_LOG_IEEE:
481 case EG_OP2_INST_RECIP_IEEE:
482 case EG_OP2_INST_RECIPSQRT_IEEE:
483 case EG_OP2_INST_FLT_TO_INT:
484 case EG_OP2_INST_SIN:
485 case EG_OP2_INST_COS:
486 case EG_OP2_INST_FLT_TO_INT_FLOOR:
487 case EG_OP2_INST_MOVA_INT:
488 return 1;
489
490 default: radeon_error(
491 "Need instruction operand number for %x.\n", opcode);
492 };
493
494 return 3;
495 }
496
497 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
498 {
499 GLuint i;
500
501 Init_R700_Shader(pShader);
502 pAsm->pR700Shader = pShader;
503 pAsm->currentShaderType = spt;
504
505 pAsm->cf_last_export_ptr = NULL;
506
507 pAsm->cf_current_export_clause_ptr = NULL;
508 pAsm->cf_current_alu_clause_ptr = NULL;
509 pAsm->cf_current_tex_clause_ptr = NULL;
510 pAsm->cf_current_vtx_clause_ptr = NULL;
511 pAsm->cf_current_cf_clause_ptr = NULL;
512
513 // No clause has been created yet
514 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
515
516 pAsm->number_of_colorandz_exports = 0;
517 pAsm->number_of_exports = 0;
518 pAsm->number_of_export_opcodes = 0;
519
520 pAsm->alu_x_opcode = 0;
521
522 pAsm->D2.bits = 0;
523
524 pAsm->D.bits = 0;
525 pAsm->S[0].bits = 0;
526 pAsm->S[1].bits = 0;
527 pAsm->S[2].bits = 0;
528
529 pAsm->uLastPosUpdate = 0;
530
531 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
532
533 pAsm->uIIns = 0;
534 pAsm->uOIns = 0;
535 pAsm->number_used_registers = 0;
536 pAsm->uUsedConsts = 256;
537
538
539 // Fragment programs
540 pAsm->uBoolConsts = 0;
541 pAsm->uIntConsts = 0;
542 pAsm->uInsts = 0;
543 pAsm->uConsts = 0;
544
545 pAsm->FCSP = 0;
546 pAsm->fc_stack[0].type = FC_NONE;
547
548 pAsm->aArgSubst[0] =
549 pAsm->aArgSubst[1] =
550 pAsm->aArgSubst[2] =
551 pAsm->aArgSubst[3] = (-1);
552
553 pAsm->uOutputs = 0;
554
555 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
556 {
557 pAsm->color_export_register_number[i] = (-1);
558 }
559
560
561 pAsm->depth_export_register_number = (-1);
562 pAsm->stencil_export_register_number = (-1);
563 pAsm->coverage_to_mask_export_register_number = (-1);
564 pAsm->mask_export_register_number = (-1);
565
566 pAsm->starting_export_register_number = 0;
567 pAsm->starting_vfetch_register_number = 0;
568 pAsm->starting_temp_register_number = 0;
569 pAsm->uFirstHelpReg = 0;
570
571 pAsm->input_position_is_used = GL_FALSE;
572 pAsm->input_normal_is_used = GL_FALSE;
573
574 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
575 {
576 pAsm->input_color_is_used[ i ] = GL_FALSE;
577 }
578
579 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
580 {
581 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
582 }
583
584 for (i=0; i<VERT_ATTRIB_MAX; i++)
585 {
586 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
587 }
588
589 pAsm->number_of_inputs = 0;
590
591 pAsm->is_tex = GL_FALSE;
592 pAsm->need_tex_barrier = GL_FALSE;
593
594 pAsm->subs = NULL;
595 pAsm->unSubArraySize = 0;
596 pAsm->unSubArrayPointer = 0;
597 pAsm->callers = NULL;
598 pAsm->unCallerArraySize = 0;
599 pAsm->unCallerArrayPointer = 0;
600
601 pAsm->CALLSP = 0;
602 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
603 pAsm->CALLSTACK[0].plstCFInstructions_local
604 = &(pAsm->pR700Shader->lstCFInstructions);
605
606 pAsm->CALLSTACK[0].max = 0;
607 pAsm->CALLSTACK[0].current = 0;
608
609 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
610
611 pAsm->unCFflags = 0;
612
613 pAsm->presubs = NULL;
614 pAsm->unPresubArraySize = 0;
615 pAsm->unNumPresub = 0;
616 pAsm->unCurNumILInsts = 0;
617
618 pAsm->unVetTexBits = 0;
619
620 return 0;
621 }
622
623 GLboolean IsTex(gl_inst_opcode Opcode)
624 {
625 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
626 (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) || (OPCODE_TXL==Opcode) )
627 {
628 return GL_TRUE;
629 }
630 return GL_FALSE;
631 }
632
633 GLboolean IsAlu(gl_inst_opcode Opcode)
634 {
635 //TODO : more for fc and ex for higher spec.
636 if( IsTex(Opcode) )
637 {
638 return GL_FALSE;
639 }
640 return GL_TRUE;
641 }
642
643 int check_current_clause(r700_AssemblerBase* pAsm,
644 CF_CLAUSE_TYPE new_clause_type)
645 {
646 if (pAsm->cf_current_clause_type != new_clause_type)
647 { //Close last open clause
648 switch (pAsm->cf_current_clause_type)
649 {
650 case CF_ALU_CLAUSE:
651 if ( pAsm->cf_current_alu_clause_ptr != NULL)
652 {
653 pAsm->cf_current_alu_clause_ptr = NULL;
654 }
655 break;
656 case CF_VTX_CLAUSE:
657 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
658 {
659 pAsm->cf_current_vtx_clause_ptr = NULL;
660 }
661 break;
662 case CF_TEX_CLAUSE:
663 if ( pAsm->cf_current_tex_clause_ptr != NULL)
664 {
665 pAsm->cf_current_tex_clause_ptr = NULL;
666 }
667 break;
668 case CF_EXPORT_CLAUSE:
669 if ( pAsm->cf_current_export_clause_ptr != NULL)
670 {
671 pAsm->cf_current_export_clause_ptr = NULL;
672 }
673 break;
674 case CF_OTHER_CLAUSE:
675 if ( pAsm->cf_current_cf_clause_ptr != NULL)
676 {
677 pAsm->cf_current_cf_clause_ptr = NULL;
678 }
679 break;
680 case CF_EMPTY_CLAUSE:
681 break;
682 default:
683 radeon_error(
684 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
685 return GL_FALSE;
686 }
687
688 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
689
690 // Create new clause
691 switch (new_clause_type)
692 {
693 case CF_ALU_CLAUSE:
694 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
695 break;
696 case CF_VTX_CLAUSE:
697 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
698 break;
699 case CF_TEX_CLAUSE:
700 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
701 break;
702 case CF_EXPORT_CLAUSE:
703 {
704 R700ControlFlowSXClause* pR700ControlFlowSXClause
705 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
706
707 // Add new export instruction to control flow program
708 if (pR700ControlFlowSXClause != 0)
709 {
710 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
711 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
712 AddCFInstruction( pAsm->pR700Shader,
713 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
714 }
715 else
716 {
717 radeon_error(
718 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
719 return GL_FALSE;
720 }
721 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
722 }
723 break;
724 case CF_EMPTY_CLAUSE:
725 break;
726 case CF_OTHER_CLAUSE:
727 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
728 break;
729 default:
730 radeon_error(
731 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
732 return GL_FALSE;
733 }
734 }
735
736 return GL_TRUE;
737 }
738
739 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
740 {
741 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
742 {
743 return GL_FALSE;
744 }
745
746 pAsm->cf_current_cf_clause_ptr =
747 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
748
749 if (pAsm->cf_current_cf_clause_ptr != NULL)
750 {
751 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
752 AddCFInstruction( pAsm->pR700Shader,
753 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
754 }
755 else
756 {
757 radeon_error("Could not allocate a new VFetch CF instruction.\n");
758 return GL_FALSE;
759 }
760
761 return GL_TRUE;
762 }
763
764 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
765 R700VertexInstruction* vertex_instruction_ptr)
766 {
767 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
768 {
769 return GL_FALSE;
770 }
771
772 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
773 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
774 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
775 ) )
776 {
777 // Create new Vfetch control flow instruction for this new clause
778 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
779
780 if (pAsm->cf_current_vtx_clause_ptr != NULL)
781 {
782 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
783 AddCFInstruction( pAsm->pR700Shader,
784 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
785 }
786 else
787 {
788 radeon_error("Could not allocate a new VFetch CF instruction.\n");
789 return GL_FALSE;
790 }
791
792 if(8 == pAsm->unAsic)
793 {
794 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, EG_CF_INST_VC,
795 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
796 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
797 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
798 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
799 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
800 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
801 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
802 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
803 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
804 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
805 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
806 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
807 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
808 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 0,
809 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
810 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, 1,
811 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
812 }
813 else
814 {
815 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
816 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
817 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
818 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
819 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
820 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
821 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
822 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
823 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
824 }
825
826 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
827 }
828 else
829 {
830 if(8 == pAsm->unAsic)
831 {
832 unsigned int count = GETbits(pAsm->cf_current_vtx_clause_ptr->m_Word1.val,
833 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
834 SETfield(pAsm->cf_current_vtx_clause_ptr->m_Word1.val, count,
835 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
836 }
837 else
838 {
839 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
840 }
841 }
842
843 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
844
845 return GL_TRUE;
846 }
847
848 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
849 R700TextureInstruction* tex_instruction_ptr)
850 {
851 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
852 {
853 return GL_FALSE;
854 }
855
856 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
857 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
858 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
859 ) )
860 {
861 // new tex cf instruction for this new clause
862 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
863
864 if (pAsm->cf_current_tex_clause_ptr != NULL)
865 {
866 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
867 AddCFInstruction( pAsm->pR700Shader,
868 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
869 }
870 else
871 {
872 radeon_error("Could not allocate a new TEX CF instruction.\n");
873 return GL_FALSE;
874 }
875
876 if(8 == pAsm->unAsic)
877 {
878 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, EG_CF_INST_TC,
879 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
880 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
881 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
882 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
883 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
884 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, SQ_CF_COND_ACTIVE,
885 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
886 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
887 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
888 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
889 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
890 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
891 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
892 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
893 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
894 #ifdef FORCE_CF_TEX_BARRIER
895 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 1,
896 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
897 #else
898 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, 0,
899 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
900 #endif
901 }
902 else
903 {
904 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
905 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
906 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
907
908 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
909 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
910 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
911 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
912 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
913 }
914 }
915 else
916 {
917 if(8 == pAsm->unAsic)
918 {
919 unsigned int count = GETbits(pAsm->cf_current_tex_clause_ptr->m_Word1.val,
920 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask) + 1;
921 SETfield(pAsm->cf_current_tex_clause_ptr->m_Word1.val, count,
922 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
923 }
924 else
925 {
926 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
927 }
928 }
929
930 // If this clause constains any TEX instruction that is dependent on a
931 // previous instruction, set the barrier bit, also always set for vert
932 // programs as tex deps are not(yet) computed for them
933 if( pAsm->currentShaderType == SPT_VP || pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
934 {
935 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
936 }
937
938 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
939 {
940 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
941 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
942 }
943
944 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
945
946 return GL_TRUE;
947 }
948
949 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
950 GLuint gl_client_id,
951 GLuint destination_register,
952 GLuint number_of_elements,
953 GLenum dataElementType,
954 VTX_FETCH_METHOD* pFetchMethod)
955 {
956 GLuint client_size_inbyte;
957 GLuint data_format;
958 GLuint mega_fetch_count;
959 GLuint is_mega_fetch_flag;
960
961 R700VertexGenericFetch* vfetch_instruction_ptr;
962 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
963
964 if (assembled_vfetch_instruction_ptr == NULL)
965 {
966 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
967 if (vfetch_instruction_ptr == NULL)
968 {
969 return GL_FALSE;
970 }
971 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
972 }
973 else
974 {
975 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
976 }
977
978 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
979
980 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
981 {
982 //TODO : mini fetch
983 mega_fetch_count = 0;
984 is_mega_fetch_flag = 0;
985 }
986 else
987 {
988 mega_fetch_count = MEGA_FETCH_BYTES - 1;
989 is_mega_fetch_flag = 0x1;
990 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
991 }
992
993 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
994 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
995 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
996
997 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
998 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
999 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1000 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
1001 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
1002
1003 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
1004 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1005 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1006 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
1007
1008 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1009
1010 // Destination register
1011 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
1012 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1013
1014 vfetch_instruction_ptr->m_Word2.f.offset = 0;
1015 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1016
1017 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
1018
1019 if (assembled_vfetch_instruction_ptr == NULL)
1020 {
1021 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1022 {
1023 return GL_FALSE;
1024 }
1025
1026 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
1027 {
1028 return GL_FALSE;
1029 }
1030 else
1031 {
1032 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
1033 }
1034 }
1035
1036 return GL_TRUE;
1037 }
1038
1039 GLboolean EG_assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
1040 GLuint destination_register,
1041 GLenum type,
1042 GLint size,
1043 GLubyte element,
1044 GLuint _signed,
1045 GLboolean normalize,
1046 GLenum format,
1047 VTX_FETCH_METHOD * pFetchMethod)
1048 {
1049 GLuint client_size_inbyte;
1050 GLuint data_format;
1051 GLuint mega_fetch_count;
1052 GLuint is_mega_fetch_flag;
1053
1054 GLuint dst_sel_x, dst_sel_y, dst_sel_z, dst_sel_w;
1055
1056 R700VertexGenericFetch* vfetch_instruction_ptr;
1057 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
1058 = pAsm->vfetch_instruction_ptr_array[element];
1059
1060 if (assembled_vfetch_instruction_ptr == NULL)
1061 {
1062 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1063 if (vfetch_instruction_ptr == NULL)
1064 {
1065 return GL_FALSE;
1066 }
1067 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1068 }
1069 else
1070 {
1071 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1072 }
1073
1074 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
1075
1076 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1077 {
1078 //TODO : mini fetch
1079 mega_fetch_count = 0;
1080 is_mega_fetch_flag = 0;
1081 }
1082 else
1083 {
1084 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1085 is_mega_fetch_flag = 0x1;
1086 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1087 }
1088
1089 SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VC_INST_FETCH,
1090 EG_VTX_WORD0__VC_INST_shift,
1091 EG_VTX_WORD0__VC_INST_mask);
1092 SETfield(vfetch_instruction_ptr->m_Word0.val, EG_VTX_FETCH_VERTEX_DATA,
1093 EG_VTX_WORD0__FETCH_TYPE_shift,
1094 EG_VTX_WORD0__FETCH_TYPE_mask);
1095 CLEARbit(vfetch_instruction_ptr->m_Word0.val,
1096 EG_VTX_WORD0__FWQ_bit);
1097 SETfield(vfetch_instruction_ptr->m_Word0.val, element,
1098 EG_VTX_WORD0__BUFFER_ID_shift,
1099 EG_VTX_WORD0__BUFFER_ID_mask);
1100 SETfield(vfetch_instruction_ptr->m_Word0.val, 0x0,
1101 EG_VTX_WORD0__SRC_GPR_shift,
1102 EG_VTX_WORD0__SRC_GPR_mask);
1103 SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
1104 EG_VTX_WORD0__SRC_REL_shift,
1105 EG_VTX_WORD0__SRC_REL_bit);
1106 SETfield(vfetch_instruction_ptr->m_Word0.val, SQ_SEL_X,
1107 EG_VTX_WORD0__SRC_SEL_X_shift,
1108 EG_VTX_WORD0__SRC_SEL_X_mask);
1109 SETfield(vfetch_instruction_ptr->m_Word0.val, mega_fetch_count,
1110 EG_VTX_WORD0__MFC_shift,
1111 EG_VTX_WORD0__MFC_mask);
1112
1113 if(format == GL_BGRA)
1114 {
1115 dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1116 dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1117 dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1118 dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1119 }
1120 else
1121 {
1122 dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1123 dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1124 dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1125 dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1126
1127 }
1128 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_x,
1129 EG_VTX_WORD1__DST_SEL_X_shift,
1130 EG_VTX_WORD1__DST_SEL_X_mask);
1131 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_y,
1132 EG_VTX_WORD1__DST_SEL_Y_shift,
1133 EG_VTX_WORD1__DST_SEL_Y_mask);
1134 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_z,
1135 EG_VTX_WORD1__DST_SEL_Z_shift,
1136 EG_VTX_WORD1__DST_SEL_Z_mask);
1137 SETfield(vfetch_instruction_ptr->m_Word1.val, dst_sel_w,
1138 EG_VTX_WORD1__DST_SEL_W_shift,
1139 EG_VTX_WORD1__DST_SEL_W_mask);
1140
1141 SETfield(vfetch_instruction_ptr->m_Word1.val, 1,
1142 EG_VTX_WORD1__UCF_shift,
1143 EG_VTX_WORD1__UCF_bit);
1144 SETfield(vfetch_instruction_ptr->m_Word1.val, data_format,
1145 EG_VTX_WORD1__DATA_FORMAT_shift,
1146 EG_VTX_WORD1__DATA_FORMAT_mask);
1147 #ifdef TEST_VFETCH
1148 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1149 EG_VTX_WORD1__FCA_shift,
1150 EG_VTX_WORD1__FCA_bit);
1151 #else
1152 if(1 == _signed)
1153 {
1154 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_SIGNED,
1155 EG_VTX_WORD1__FCA_shift,
1156 EG_VTX_WORD1__FCA_bit);
1157 }
1158 else
1159 {
1160 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_FORMAT_COMP_UNSIGNED,
1161 EG_VTX_WORD1__FCA_shift,
1162 EG_VTX_WORD1__FCA_bit);
1163 }
1164 #endif /* TEST_VFETCH */
1165
1166 if(GL_TRUE == normalize)
1167 {
1168 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_NORM,
1169 EG_VTX_WORD1__NFA_shift,
1170 EG_VTX_WORD1__NFA_mask);
1171 }
1172 else
1173 {
1174 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_NUM_FORMAT_SCALED,
1175 EG_VTX_WORD1__NFA_shift,
1176 EG_VTX_WORD1__NFA_mask);
1177 }
1178
1179 /* Destination register */
1180 SETfield(vfetch_instruction_ptr->m_Word1.val, destination_register,
1181 EG_VTX_WORD1_GPR__DST_GPR_shift,
1182 EG_VTX_WORD1_GPR__DST_GPR_mask);
1183 SETfield(vfetch_instruction_ptr->m_Word1.val, SQ_ABSOLUTE,
1184 EG_VTX_WORD1_GPR__DST_REL_shift,
1185 EG_VTX_WORD1_GPR__DST_REL_bit);
1186
1187
1188 SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1189 EG_VTX_WORD2__OFFSET_shift,
1190 EG_VTX_WORD2__OFFSET_mask);
1191 SETfield(vfetch_instruction_ptr->m_Word2.val,
1192 #ifdef MESA_BIG_ENDIAN
1193 SQ_ENDIAN_8IN32,
1194 #else
1195 SQ_ENDIAN_NONE,
1196 #endif
1197 EG_VTX_WORD2__ENDIAN_SWAP_shift,
1198 EG_VTX_WORD2__ENDIAN_SWAP_mask);
1199 SETfield(vfetch_instruction_ptr->m_Word2.val, 0,
1200 EG_VTX_WORD2__CBNS_shift,
1201 EG_VTX_WORD2__CBNS_bit);
1202 SETfield(vfetch_instruction_ptr->m_Word2.val, is_mega_fetch_flag,
1203 EG_VTX_WORD2__MEGA_FETCH_shift,
1204 EG_VTX_WORD2__MEGA_FETCH_mask);
1205
1206 if (assembled_vfetch_instruction_ptr == NULL)
1207 {
1208 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1209 {
1210 return GL_FALSE;
1211 }
1212
1213 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
1214 {
1215 return GL_FALSE;
1216 }
1217 else
1218 {
1219 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1220 }
1221 }
1222
1223 return GL_TRUE;
1224 }
1225
1226 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
1227 GLuint destination_register,
1228 GLenum type,
1229 GLint size,
1230 GLubyte element,
1231 GLuint _signed,
1232 GLboolean normalize,
1233 GLenum format,
1234 VTX_FETCH_METHOD * pFetchMethod)
1235 {
1236 GLuint client_size_inbyte;
1237 GLuint data_format;
1238 GLuint mega_fetch_count;
1239 GLuint is_mega_fetch_flag;
1240
1241 R700VertexGenericFetch* vfetch_instruction_ptr;
1242 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
1243 = pAsm->vfetch_instruction_ptr_array[element];
1244
1245 if (assembled_vfetch_instruction_ptr == NULL)
1246 {
1247 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
1248 if (vfetch_instruction_ptr == NULL)
1249 {
1250 return GL_FALSE;
1251 }
1252 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
1253 }
1254 else
1255 {
1256 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
1257 }
1258
1259 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
1260
1261 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
1262 {
1263 //TODO : mini fetch
1264 mega_fetch_count = 0;
1265 is_mega_fetch_flag = 0;
1266 }
1267 else
1268 {
1269 mega_fetch_count = MEGA_FETCH_BYTES - 1;
1270 is_mega_fetch_flag = 0x1;
1271 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
1272 }
1273
1274 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
1275 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
1276 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1277
1278 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
1279 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
1280 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1281 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
1282 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
1283
1284 if(format == GL_BGRA)
1285 {
1286 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
1287 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1288 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
1289 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1290 }
1291 else
1292 {
1293 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
1294 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
1295 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
1296 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
1297
1298 }
1299
1300 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
1301 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
1302 #ifdef MESA_BIG_ENDIAN
1303 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_8IN32;
1304 #else
1305 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
1306 #endif
1307
1308 if(1 == _signed)
1309 {
1310 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
1311 }
1312 else
1313 {
1314 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
1315 }
1316
1317 if(GL_TRUE == normalize)
1318 {
1319 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
1320 }
1321 else
1322 {
1323 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
1324 }
1325
1326 // Destination register
1327 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
1328 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
1329
1330 vfetch_instruction_ptr->m_Word2.f.offset = 0;
1331 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
1332
1333 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
1334
1335 if (assembled_vfetch_instruction_ptr == NULL)
1336 {
1337 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
1338 {
1339 return GL_FALSE;
1340 }
1341
1342 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
1343 {
1344 return GL_FALSE;
1345 }
1346 else
1347 {
1348 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1349 }
1350 }
1351
1352 return GL_TRUE;
1353 }
1354
1355 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
1356 {
1357 GLint i;
1358 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
1359 pAsm->cf_current_vtx_clause_ptr = NULL;
1360
1361 for (i=0; i<VERT_ATTRIB_MAX; i++)
1362 {
1363 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
1364 }
1365
1366 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
1367
1368 return GL_TRUE;
1369 }
1370
1371 GLuint gethelpr(r700_AssemblerBase* pAsm)
1372 {
1373 GLuint r = pAsm->uHelpReg;
1374 pAsm->uHelpReg++;
1375 if (pAsm->uHelpReg > pAsm->number_used_registers)
1376 {
1377 pAsm->number_used_registers = pAsm->uHelpReg;
1378 }
1379 return r;
1380 }
1381 void resethelpr(r700_AssemblerBase* pAsm)
1382 {
1383 pAsm->uHelpReg = pAsm->uFirstHelpReg;
1384 }
1385
1386 void checkop_init(r700_AssemblerBase* pAsm)
1387 {
1388 resethelpr(pAsm);
1389 pAsm->aArgSubst[0] =
1390 pAsm->aArgSubst[1] =
1391 pAsm->aArgSubst[2] =
1392 pAsm->aArgSubst[3] = -1;
1393 }
1394
1395 static GLboolean next_ins(r700_AssemblerBase *pAsm)
1396 {
1397 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1398
1399 if (GL_TRUE == pAsm->is_tex)
1400 {
1401 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX)
1402 {
1403 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE))
1404 {
1405 radeon_error("Error assembling TEX instruction\n");
1406 return GL_FALSE;
1407 }
1408 }
1409 else
1410 {
1411 if (GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE))
1412 {
1413 radeon_error("Error assembling TEX instruction\n");
1414 return GL_FALSE;
1415 }
1416 }
1417 }
1418 else
1419 { //ALU
1420 if (GL_FALSE == assemble_alu_instruction(pAsm))
1421 {
1422 radeon_error("Error assembling ALU instruction\n");
1423 return GL_FALSE;
1424 }
1425 }
1426
1427 if (pAsm->D.dst.rtype == DST_REG_OUT)
1428 {
1429 assert(pAsm->D.dst.reg >= pAsm->starting_export_register_number);
1430 }
1431
1432 //reset for next inst.
1433 pAsm->D.bits = 0;
1434 pAsm->D2.bits = 0;
1435 pAsm->S[0].bits = 0;
1436 pAsm->S[1].bits = 0;
1437 pAsm->S[2].bits = 0;
1438 pAsm->is_tex = GL_FALSE;
1439 pAsm->need_tex_barrier = GL_FALSE;
1440 pAsm->D2.bits = 0;
1441 pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
1442 return GL_TRUE;
1443 }
1444
1445 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
1446 {
1447 GLuint tmp = gethelpr(pAsm);
1448
1449 //mov src to temp helper gpr.
1450 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1451
1452 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1453
1454 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1455 pAsm->D.dst.reg = tmp;
1456
1457 nomask_PVSDST(&(pAsm->D.dst));
1458
1459 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1460 {
1461 return GL_FALSE;
1462 }
1463
1464 noswizzle_PVSSRC(&(pAsm->S[0].src));
1465 noneg_PVSSRC(&(pAsm->S[0].src));
1466
1467 if( GL_FALSE == next_ins(pAsm) )
1468 {
1469 return GL_FALSE;
1470 }
1471
1472 pAsm->aArgSubst[1 + src] = tmp;
1473
1474 return GL_TRUE;
1475 }
1476
1477 GLboolean checkop1(r700_AssemblerBase* pAsm)
1478 {
1479 checkop_init(pAsm);
1480 return GL_TRUE;
1481 }
1482
1483 GLboolean checkop2(r700_AssemblerBase* pAsm)
1484 {
1485 GLboolean bSrcConst[2];
1486 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1487
1488 checkop_init(pAsm);
1489
1490 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1491 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1492 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1493 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1494 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1495 {
1496 bSrcConst[0] = GL_TRUE;
1497 }
1498 else
1499 {
1500 bSrcConst[0] = GL_FALSE;
1501 }
1502 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1503 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1504 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1505 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1506 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1507 {
1508 bSrcConst[1] = GL_TRUE;
1509 }
1510 else
1511 {
1512 bSrcConst[1] = GL_FALSE;
1513 }
1514
1515 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1516 {
1517 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1518 {
1519 if( GL_FALSE == mov_temp(pAsm, 1) )
1520 {
1521 return GL_FALSE;
1522 }
1523 }
1524 }
1525
1526 return GL_TRUE;
1527 }
1528
1529 GLboolean checkop3(r700_AssemblerBase* pAsm)
1530 {
1531 GLboolean bSrcConst[3];
1532 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1533
1534 checkop_init(pAsm);
1535
1536 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1537 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1538 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1539 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1540 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1541 {
1542 bSrcConst[0] = GL_TRUE;
1543 }
1544 else
1545 {
1546 bSrcConst[0] = GL_FALSE;
1547 }
1548 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1549 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1550 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1551 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1552 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1553 {
1554 bSrcConst[1] = GL_TRUE;
1555 }
1556 else
1557 {
1558 bSrcConst[1] = GL_FALSE;
1559 }
1560 if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
1561 (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1562 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1563 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1564 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1565 {
1566 bSrcConst[2] = GL_TRUE;
1567 }
1568 else
1569 {
1570 bSrcConst[2] = GL_FALSE;
1571 }
1572
1573 if( (GL_TRUE == bSrcConst[0]) &&
1574 (GL_TRUE == bSrcConst[1]) &&
1575 (GL_TRUE == bSrcConst[2]) )
1576 {
1577 if( GL_FALSE == mov_temp(pAsm, 1) )
1578 {
1579 return GL_FALSE;
1580 }
1581 if( GL_FALSE == mov_temp(pAsm, 2) )
1582 {
1583 return GL_FALSE;
1584 }
1585
1586 return GL_TRUE;
1587 }
1588 else if( (GL_TRUE == bSrcConst[0]) &&
1589 (GL_TRUE == bSrcConst[1]) )
1590 {
1591 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1592 {
1593 if( GL_FALSE == mov_temp(pAsm, 1) )
1594 {
1595 return GL_FALSE;
1596 }
1597 }
1598
1599 return GL_TRUE;
1600 }
1601 else if ( (GL_TRUE == bSrcConst[0]) &&
1602 (GL_TRUE == bSrcConst[2]) )
1603 {
1604 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1605 {
1606 if( GL_FALSE == mov_temp(pAsm, 2) )
1607 {
1608 return GL_FALSE;
1609 }
1610 }
1611
1612 return GL_TRUE;
1613 }
1614 else if( (GL_TRUE == bSrcConst[1]) &&
1615 (GL_TRUE == bSrcConst[2]) )
1616 {
1617 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1618 {
1619 if( GL_FALSE == mov_temp(pAsm, 2) )
1620 {
1621 return GL_FALSE;
1622 }
1623 }
1624
1625 return GL_TRUE;
1626 }
1627
1628 return GL_TRUE;
1629 }
1630
1631 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1632 int src,
1633 int fld)
1634 {
1635 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1636
1637 if (fld == -1)
1638 {
1639 fld = src;
1640 }
1641
1642 if(pAsm->aArgSubst[1+src] >= 0)
1643 {
1644 assert(fld >= 0);
1645 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1646 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1647 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1648 }
1649 else
1650 {
1651 if (1 == pILInst->SrcReg[src].RelAddr)
1652 {
1653 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1654 }
1655 else
1656 {
1657 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1658 }
1659 switch (pILInst->SrcReg[src].File)
1660 {
1661 case PROGRAM_TEMPORARY:
1662 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1663 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1664 break;
1665 case PROGRAM_CONSTANT:
1666 case PROGRAM_LOCAL_PARAM:
1667 case PROGRAM_ENV_PARAM:
1668 case PROGRAM_STATE_VAR:
1669 case PROGRAM_UNIFORM:
1670 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1671 if(pILInst->SrcReg[src].Index < 0)
1672 {
1673 WARN_ONCE("Negative register offsets not supported yet!\n");
1674 pAsm->S[fld].src.reg = 0;
1675 }
1676 else
1677 {
1678 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1679 }
1680 break;
1681 case PROGRAM_INPUT:
1682 pAsm->S[fld].src.rtype = SRC_REG_GPR;
1683 switch (pAsm->currentShaderType)
1684 {
1685 case SPT_FP:
1686 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1687 break;
1688 case SPT_VP:
1689 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1690 break;
1691 }
1692 break;
1693 case PROGRAM_OUTPUT:
1694 pAsm->S[fld].src.rtype = SRC_REG_GPR;
1695 switch (pAsm->currentShaderType)
1696 {
1697 case SPT_FP:
1698 pAsm->S[fld].src.reg = pAsm->uiFP_OutputMap[pILInst->SrcReg[src].Index];
1699 break;
1700 case SPT_VP:
1701 pAsm->S[fld].src.reg = pAsm->ucVP_OutputMap[pILInst->SrcReg[src].Index];
1702 break;
1703 }
1704 break;
1705 default:
1706 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1707 return GL_FALSE;
1708 }
1709 }
1710
1711 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1712 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1713 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1714 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1715
1716 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1717 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1718 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1719 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1720
1721 return GL_TRUE;
1722 }
1723
1724 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1725 {
1726 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1727 switch (pILInst->DstReg.File)
1728 {
1729 case PROGRAM_TEMPORARY:
1730 if (1 == pILInst->DstReg.RelAddr)
1731 {
1732 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1733 }
1734 else
1735 {
1736 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1737 }
1738 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1739 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1740 break;
1741 case PROGRAM_ADDRESS:
1742 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1743 pAsm->D.dst.rtype = DST_REG_A0;
1744 pAsm->D.dst.reg = 0;
1745 break;
1746 case PROGRAM_OUTPUT:
1747 if (1 == pILInst->DstReg.RelAddr)
1748 {
1749 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_RELATIVE_A0);
1750 }
1751 else
1752 {
1753 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1754 }
1755 pAsm->D.dst.rtype = DST_REG_OUT;
1756 switch (pAsm->currentShaderType)
1757 {
1758 case SPT_FP:
1759 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1760 break;
1761 case SPT_VP:
1762 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1763 break;
1764 }
1765 break;
1766 default:
1767 radeon_error("Invalid destination output argument type\n");
1768 return GL_FALSE;
1769 }
1770
1771 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1772 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1773 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1774 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1775
1776 if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1777 {
1778 pAsm->D2.dst2.SaturateMode = 1;
1779 }
1780 else
1781 {
1782 pAsm->D2.dst2.SaturateMode = 0;
1783 }
1784
1785 return GL_TRUE;
1786 }
1787
1788 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1789 {
1790 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1791
1792 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1793 {
1794 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1795 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1796
1797 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1798 }
1799 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1800 {
1801 pAsm->D.dst.rtype = DST_REG_OUT;
1802 switch (pAsm->currentShaderType)
1803 {
1804 case SPT_FP:
1805 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1806 break;
1807 case SPT_VP:
1808 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1809 break;
1810 }
1811
1812 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1813 }
1814 else
1815 {
1816 radeon_error("Invalid destination output argument type\n");
1817 return GL_FALSE;
1818 }
1819
1820 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1821 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1822 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1823 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1824
1825 return GL_TRUE;
1826 }
1827
1828 GLboolean tex_src(r700_AssemblerBase *pAsm)
1829 {
1830 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1831
1832 GLboolean bValidTexCoord = GL_FALSE;
1833
1834 if(pAsm->aArgSubst[1] >= 0)
1835 {
1836 bValidTexCoord = GL_TRUE;
1837 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1838 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1839 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1840 }
1841 else
1842 {
1843 switch (pILInst->SrcReg[0].File) {
1844 case PROGRAM_UNIFORM:
1845 case PROGRAM_CONSTANT:
1846 case PROGRAM_LOCAL_PARAM:
1847 case PROGRAM_ENV_PARAM:
1848 case PROGRAM_STATE_VAR:
1849 break;
1850 case PROGRAM_TEMPORARY:
1851 bValidTexCoord = GL_TRUE;
1852 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1853 pAsm->starting_temp_register_number;
1854 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1855 break;
1856 case PROGRAM_INPUT:
1857 if(SPT_VP == pAsm->currentShaderType)
1858 {
1859 switch (pILInst->SrcReg[0].Index)
1860 {
1861 case VERT_ATTRIB_TEX0:
1862 case VERT_ATTRIB_TEX1:
1863 case VERT_ATTRIB_TEX2:
1864 case VERT_ATTRIB_TEX3:
1865 case VERT_ATTRIB_TEX4:
1866 case VERT_ATTRIB_TEX5:
1867 case VERT_ATTRIB_TEX6:
1868 case VERT_ATTRIB_TEX7:
1869 bValidTexCoord = GL_TRUE;
1870 pAsm->S[0].src.reg =
1871 pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
1872 pAsm->S[0].src.rtype = SRC_REG_GPR;
1873 break;
1874 }
1875 }
1876 else
1877 {
1878 switch (pILInst->SrcReg[0].Index)
1879 {
1880 case FRAG_ATTRIB_WPOS:
1881 case FRAG_ATTRIB_COL0:
1882 case FRAG_ATTRIB_COL1:
1883 case FRAG_ATTRIB_FOGC:
1884 case FRAG_ATTRIB_TEX0:
1885 case FRAG_ATTRIB_TEX1:
1886 case FRAG_ATTRIB_TEX2:
1887 case FRAG_ATTRIB_TEX3:
1888 case FRAG_ATTRIB_TEX4:
1889 case FRAG_ATTRIB_TEX5:
1890 case FRAG_ATTRIB_TEX6:
1891 case FRAG_ATTRIB_TEX7:
1892 bValidTexCoord = GL_TRUE;
1893 pAsm->S[0].src.reg =
1894 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1895 pAsm->S[0].src.rtype = SRC_REG_GPR;
1896 break;
1897 case FRAG_ATTRIB_FACE:
1898 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1899 break;
1900 case FRAG_ATTRIB_PNTC:
1901 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1902 break;
1903 }
1904
1905 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1906 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1907 {
1908 bValidTexCoord = GL_TRUE;
1909 pAsm->S[0].src.reg =
1910 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1911 pAsm->S[0].src.rtype = SRC_REG_GPR;
1912 }
1913 }
1914
1915 break;
1916 }
1917 }
1918
1919 if(GL_TRUE == bValidTexCoord)
1920 {
1921 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1922 }
1923 else
1924 {
1925 radeon_error("Invalid source texcoord for TEX instruction\n");
1926 return GL_FALSE;
1927 }
1928
1929 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1930 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1931 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1932 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1933
1934 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1935 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1936 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1937 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1938
1939 return GL_TRUE;
1940 }
1941
1942 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1943 {
1944 PVSSRC * texture_coordinate_source;
1945 PVSSRC * texture_unit_source;
1946
1947 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1948 if (tex_instruction_ptr == NULL)
1949 {
1950 return GL_FALSE;
1951 }
1952 Init_R700TextureInstruction(tex_instruction_ptr);
1953
1954 texture_coordinate_source = &(pAsm->S[0].src);
1955 texture_unit_source = &(pAsm->S[1].src);
1956
1957 if(8 == pAsm->unAsic) /* evergreen */
1958 {
1959
1960 SETfield(tex_instruction_ptr->m_Word0.val, pAsm->D.dst.opcode,
1961 EG_TEX_WORD0__TEX_INST_shift,
1962 EG_TEX_WORD0__TEX_INST_mask);
1963
1964 if( (SQ_TEX_INST_GET_GRADIENTS_H == pAsm->D.dst.opcode)
1965 ||(SQ_TEX_INST_GET_GRADIENTS_V == pAsm->D.dst.opcode) )
1966 {
1967 /* Use fine texel derivative calculation rather than use quad derivative */
1968 SETfield(tex_instruction_ptr->m_Word0.val, 1,
1969 EG_TEX_WORD0__INST_MOD_shift,
1970 EG_TEX_WORD0__INST_MOD_mask);
1971 }
1972 else
1973 {
1974 SETfield(tex_instruction_ptr->m_Word0.val, 0,
1975 EG_TEX_WORD0__INST_MOD_shift,
1976 EG_TEX_WORD0__INST_MOD_mask);
1977 }
1978
1979 CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__FWQ_bit);
1980
1981 if(SPT_VP == pAsm->currentShaderType)
1982 {
1983 SETfield(tex_instruction_ptr->m_Word0.val, (texture_unit_source->reg + VERT_ATTRIB_MAX),
1984 EG_TEX_WORD0__RESOURCE_ID_shift,
1985 EG_TEX_WORD0__RESOURCE_ID_mask);
1986 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
1987 }
1988 else
1989 {
1990 SETfield(tex_instruction_ptr->m_Word0.val, texture_unit_source->reg,
1991 EG_TEX_WORD0__RESOURCE_ID_shift,
1992 EG_TEX_WORD0__RESOURCE_ID_mask);
1993 }
1994
1995 CLEARbit(tex_instruction_ptr->m_Word0.val, EG_TEX_WORD0__ALT_CONST_bit);
1996 SETfield(tex_instruction_ptr->m_Word0.val, 0,
1997 EG_TEX_WORD0__RIM_shift,
1998 EG_TEX_WORD0__RIM_mask);
1999 SETfield(tex_instruction_ptr->m_Word0.val, 0,
2000 EG_TEX_WORD0__SIM_shift,
2001 EG_TEX_WORD0__SIM_mask);
2002 }
2003 else
2004 {
2005 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
2006 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
2007 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
2008 tex_instruction_ptr->m_Word0.f.alt_const = 0;
2009
2010 if(SPT_VP == pAsm->currentShaderType)
2011 {
2012 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
2013 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
2014 }
2015 else
2016 {
2017 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
2018 }
2019 }
2020
2021 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
2022 if (normalized) {
2023 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
2024 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
2025 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
2026 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
2027 } else {
2028 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
2029 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
2030 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
2031 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
2032 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
2033 }
2034
2035 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
2036 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
2037 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
2038 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
2039
2040 // dst
2041 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2042 (pAsm->D.dst.rtype == DST_REG_OUT) )
2043 {
2044 if(8 == pAsm->unAsic) /* evergreen */
2045 {
2046 SETfield(tex_instruction_ptr->m_Word0.val, texture_coordinate_source->reg,
2047 EG_TEX_WORD0__SRC_GPR_shift,
2048 EG_TEX_WORD0__SRC_GPR_mask);
2049 SETfield(tex_instruction_ptr->m_Word0.val, SQ_ABSOLUTE,
2050 EG_TEX_WORD0__SRC_REL_shift,
2051 EG_TEX_WORD0__SRC_REL_bit);
2052 }
2053 else
2054 {
2055 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
2056 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
2057 }
2058
2059 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2060 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
2061
2062 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
2063 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
2064 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
2065 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
2066
2067
2068 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
2069 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
2070 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
2071 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
2072 }
2073 else
2074 {
2075 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
2076 return GL_FALSE;
2077 }
2078
2079 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
2080 {
2081 return GL_FALSE;
2082 }
2083
2084 return GL_TRUE;
2085 }
2086
2087 void initialize(r700_AssemblerBase *pAsm)
2088 {
2089 GLuint cycle, component;
2090
2091 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
2092 {
2093 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
2094 {
2095 pAsm->hw_gpr[cycle][component] = (-1);
2096 }
2097 }
2098 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
2099 {
2100 pAsm->hw_cfile_addr[component] = (-1);
2101 pAsm->hw_cfile_chan[component] = (-1);
2102 }
2103 }
2104
2105 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
2106 int source_index,
2107 PVSSRC* pSource,
2108 BITS scalar_channel_index,
2109 r700_AssemblerBase *pAsm)
2110 {
2111 BITS src_sel;
2112 BITS src_rel;
2113 BITS src_chan;
2114 BITS src_neg;
2115
2116 //--------------------------------------------------------------------------
2117 // Source for operands src0, src1.
2118 // Values [0,127] correspond to GPR[0..127].
2119 // Values [256,511] correspond to cfile constants c[0..255].
2120
2121 //--------------------------------------------------------------------------
2122 // Other special values are shown in the list below.
2123
2124 // 248 SQ_ALU_SRC_0: special constant 0.0.
2125 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
2126
2127 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
2128 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
2129
2130 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
2131 // 253 SQ_ALU_SRC_LITERAL: literal constant.
2132
2133 // 254 SQ_ALU_SRC_PV: previous vector result.
2134 // 255 SQ_ALU_SRC_PS: previous scalar result.
2135 //--------------------------------------------------------------------------
2136
2137 BITS channel_swizzle;
2138 switch (scalar_channel_index)
2139 {
2140 case 0: channel_swizzle = pSource->swizzlex; break;
2141 case 1: channel_swizzle = pSource->swizzley; break;
2142 case 2: channel_swizzle = pSource->swizzlez; break;
2143 case 3: channel_swizzle = pSource->swizzlew; break;
2144 default: channel_swizzle = SQ_SEL_MASK; break;
2145 }
2146
2147 if(channel_swizzle == SQ_SEL_0)
2148 {
2149 src_sel = SQ_ALU_SRC_0;
2150 }
2151 else if (channel_swizzle == SQ_SEL_1)
2152 {
2153 src_sel = SQ_ALU_SRC_1;
2154 }
2155 else
2156 {
2157 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
2158 (pSource->rtype == SRC_REG_GPR)
2159 )
2160 {
2161 src_sel = pSource->reg;
2162 }
2163 else if (pSource->rtype == SRC_REG_CONSTANT)
2164 {
2165 /* TODO : 4 const buffers */
2166 if(GL_TRUE == pAsm->bUseMemConstant)
2167 {
2168 src_sel = pSource->reg + SQ_ALU_SRC_KCACHE0_BASE;
2169 pAsm->kcacheUsed = SQ_ALU_SRC_KCACHE0_BASE;
2170 }
2171 else
2172 {
2173 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
2174 }
2175 }
2176 else if (pSource->rtype == SRC_REC_LITERAL)
2177 {
2178 src_sel = SQ_ALU_SRC_LITERAL;
2179 }
2180 else
2181 {
2182 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
2183 source_index, pSource->rtype);
2184 return GL_FALSE;
2185 }
2186 }
2187
2188 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
2189 {
2190 src_rel = SQ_ABSOLUTE;
2191 }
2192 else
2193 {
2194 src_rel = SQ_RELATIVE;
2195 }
2196
2197 switch (channel_swizzle)
2198 {
2199 case SQ_SEL_X:
2200 src_chan = SQ_CHAN_X;
2201 break;
2202 case SQ_SEL_Y:
2203 src_chan = SQ_CHAN_Y;
2204 break;
2205 case SQ_SEL_Z:
2206 src_chan = SQ_CHAN_Z;
2207 break;
2208 case SQ_SEL_W:
2209 src_chan = SQ_CHAN_W;
2210 break;
2211 case SQ_SEL_0:
2212 case SQ_SEL_1:
2213 // Does not matter since src_sel controls
2214 src_chan = SQ_CHAN_X;
2215 break;
2216 default:
2217 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
2218 return GL_FALSE;
2219 break;
2220 }
2221
2222 switch (scalar_channel_index)
2223 {
2224 case 0: src_neg = pSource->negx; break;
2225 case 1: src_neg = pSource->negy; break;
2226 case 2: src_neg = pSource->negz; break;
2227 case 3: src_neg = pSource->negw; break;
2228 default: src_neg = 0; break;
2229 }
2230
2231 switch (source_index)
2232 {
2233 case 0:
2234 assert(alu_instruction_ptr);
2235 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
2236 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
2237 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
2238 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
2239 break;
2240 case 1:
2241 assert(alu_instruction_ptr);
2242 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
2243 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
2244 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
2245 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
2246 break;
2247 case 2:
2248 assert(alu_instruction_ptr);
2249 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
2250 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
2251 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
2252 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
2253 break;
2254 default:
2255 radeon_error("Only three sources allowed in ALU opcodes.\n");
2256 return GL_FALSE;
2257 break;
2258 }
2259
2260 return GL_TRUE;
2261 }
2262
2263 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
2264 R700ALUInstruction* alu_instruction_ptr,
2265 GLuint contiguous_slots_needed)
2266 {
2267 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
2268 {
2269 return GL_FALSE;
2270 }
2271
2272 if ( pAsm->alu_x_opcode != 0 ||
2273 pAsm->cf_current_alu_clause_ptr == NULL ||
2274 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
2275 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
2276 ) )
2277 {
2278
2279 //new cf inst for this clause
2280 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
2281
2282 // link the new cf to cf segment
2283 if(NULL != pAsm->cf_current_alu_clause_ptr)
2284 {
2285 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
2286 AddCFInstruction( pAsm->pR700Shader,
2287 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
2288 }
2289 else
2290 {
2291 radeon_error("Could not allocate a new ALU CF instruction.\n");
2292 return GL_FALSE;
2293 }
2294
2295 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2296 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2297 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
2298
2299 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2300 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2301 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2302
2303 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
2304
2305 if(pAsm->alu_x_opcode != 0)
2306 {
2307 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
2308 pAsm->alu_x_opcode = 0;
2309 }
2310 else
2311 {
2312 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
2313 }
2314
2315 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
2316
2317 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
2318 }
2319 else
2320 {
2321 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
2322 }
2323
2324 /* TODO : handle 4 bufs */
2325 if( (pAsm->kcacheUsed > 0) && (GL_TRUE == pAsm->bUseMemConstant) )
2326 {
2327 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
2328 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
2329 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_LOCK_2;
2330 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
2331 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
2332 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
2333 }
2334
2335 // If this clause constains any instruction that is forward dependent on a TEX instruction,
2336 // set the whole_quad_mode for this clause
2337 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
2338 {
2339 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
2340 }
2341
2342 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
2343 {
2344 alu_instruction_ptr->m_Word0.f.last = 1;
2345 }
2346
2347 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
2348 {
2349 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
2350 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
2351 }
2352
2353 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
2354
2355 return GL_TRUE;
2356 }
2357
2358 GLboolean EG_add_ps_interp(r700_AssemblerBase* pAsm)
2359 {
2360 R700ALUInstruction * alu_instruction_ptr = NULL;
2361
2362 int ui;
2363 unsigned int uj;
2364 unsigned int unWord0Temp = 0x380C00;
2365 unsigned int unWord1Temp = 0x146B10; //SQ_SEL_X
2366
2367 if(pAsm->uIIns > 0)
2368 {
2369 for(ui=(pAsm->uIIns-1); ui>=0; ui--)
2370 {
2371 for(uj=0; uj<8; uj++)
2372 {
2373 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2374 Init_R700ALUInstruction(alu_instruction_ptr);
2375 alu_instruction_ptr->m_Word0.val = unWord0Temp;
2376 alu_instruction_ptr->m_Word1.val = unWord1Temp;
2377
2378 if(uj < 4)
2379 {
2380 SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_ZW,
2381 EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2382 }
2383 else
2384 {
2385 SETfield(alu_instruction_ptr->m_Word1.val, EG_OP2_INST_INTERP_XY,
2386 EG_ALU_WORD1_OP2__ALU_INST_shift, EG_ALU_WORD1_OP2__ALU_INST_mask);
2387 }
2388 if( (uj > 1) && (uj < 6) )
2389 {
2390 SETfield(alu_instruction_ptr->m_Word1.val, 1,
2391 EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2392 }
2393 else
2394 {
2395 SETfield(alu_instruction_ptr->m_Word1.val, 0,
2396 EG_ALU_WORD1_OP2__WRITE_MASK_shift, EG_ALU_WORD1_OP2__WRITE_MASK_bit);
2397 }
2398 if( (uj > 1) && (uj < 6) )
2399 {
2400 SETfield(alu_instruction_ptr->m_Word1.val, ui,
2401 EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2402 }
2403 else
2404 {
2405 SETfield(alu_instruction_ptr->m_Word1.val, 111,
2406 EG_ALU_WORD1__DST_GPR_shift, EG_ALU_WORD1__DST_GPR_mask);
2407 }
2408
2409 SETfield(alu_instruction_ptr->m_Word1.val, (uj % 4),
2410 EG_ALU_WORD1__DST_CHAN_shift, EG_ALU_WORD1__DST_CHAN_mask);
2411 SETfield(alu_instruction_ptr->m_Word0.val, (1 - (uj % 2)),
2412 EG_ALU_WORD0__SRC0_CHAN_shift, EG_ALU_WORD0__SRC0_CHAN_mask);
2413 SETfield(alu_instruction_ptr->m_Word0.val, (EG_ALU_SRC_PARAM_BASE + ui),
2414 EG_ALU_WORD0__SRC1_SEL_shift, EG_ALU_WORD0__SRC1_SEL_mask);
2415 if(3 == (uj % 4))
2416 {
2417 SETfield(alu_instruction_ptr->m_Word0.val, 1,
2418 EG_ALU_WORD0__LAST_shift, EG_ALU_WORD0__LAST_bit);
2419 }
2420
2421 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, 4) )
2422 {
2423 return GL_FALSE;
2424 }
2425 }
2426 }
2427 }
2428
2429 return GL_TRUE;
2430 }
2431
2432 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
2433 int source_index,
2434 BITS* psrc_sel,
2435 BITS* psrc_rel,
2436 BITS* psrc_chan,
2437 BITS* psrc_neg)
2438 {
2439 switch (source_index)
2440 {
2441 case 0:
2442 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
2443 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
2444 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
2445 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
2446 break;
2447
2448 case 1:
2449 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
2450 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
2451 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
2452 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
2453 break;
2454
2455 case 2:
2456 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
2457 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
2458 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
2459 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
2460 break;
2461 }
2462 }
2463
2464 int is_cfile(BITS sel)
2465 {
2466 if (sel > 255 && sel < 512)
2467 {
2468 return 1;
2469 }
2470 return 0;
2471 }
2472
2473 int is_const(BITS sel)
2474 {
2475 if (is_cfile(sel))
2476 {
2477 return 1;
2478 }
2479 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
2480 {
2481 return 1;
2482 }
2483 return 0;
2484 }
2485
2486 int is_gpr(BITS sel)
2487 {
2488 if (sel >= 0 && sel < 128)
2489 {
2490 return 1;
2491 }
2492 return 0;
2493 }
2494
2495 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
2496 SQ_ALU_VEC_120, //001
2497 SQ_ALU_VEC_102, //010
2498
2499 SQ_ALU_VEC_201, //011
2500 SQ_ALU_VEC_012, //100
2501 SQ_ALU_VEC_021, //101
2502
2503 SQ_ALU_VEC_012, //110
2504 SQ_ALU_VEC_012}; //111
2505
2506 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
2507 SQ_ALU_SCL_122, //001
2508 SQ_ALU_SCL_122, //010
2509
2510 SQ_ALU_SCL_221, //011
2511 SQ_ALU_SCL_212, //100
2512 SQ_ALU_SCL_122, //101
2513
2514 SQ_ALU_SCL_122, //110
2515 SQ_ALU_SCL_122}; //111
2516
2517 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
2518 GLuint sel,
2519 GLuint chan)
2520 {
2521 int res_match = (-1);
2522 int res_empty = (-1);
2523
2524 GLint res;
2525
2526 for (res=3; res>=0; res--)
2527 {
2528 if(pAsm->hw_cfile_addr[ res] < 0)
2529 {
2530 res_empty = res;
2531 }
2532 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
2533 &&
2534 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
2535 {
2536 res_match = res;
2537 }
2538 }
2539
2540 if(res_match >= 0)
2541 {
2542 // Read for this scalar component already reserved, nothing to do here.
2543 ;
2544 }
2545 else if(res_empty >= 0)
2546 {
2547 pAsm->hw_cfile_addr[ res_empty ] = sel;
2548 pAsm->hw_cfile_chan[ res_empty ] = chan;
2549 }
2550 else
2551 {
2552 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
2553 return GL_FALSE;
2554 }
2555 return GL_TRUE;
2556 }
2557
2558 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
2559 {
2560 if(pAsm->hw_gpr[cycle][chan] < 0)
2561 {
2562 pAsm->hw_gpr[cycle][chan] = sel;
2563 }
2564 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
2565 {
2566 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
2567 return GL_FALSE;
2568 }
2569
2570 return GL_TRUE;
2571 }
2572
2573 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2574 {
2575 switch (swiz)
2576 {
2577 case SQ_ALU_SCL_210:
2578 {
2579 int table[3] = {2, 1, 0};
2580 *pCycle = table[sel];
2581 return GL_TRUE;
2582 }
2583 break;
2584 case SQ_ALU_SCL_122:
2585 {
2586 int table[3] = {1, 2, 2};
2587 *pCycle = table[sel];
2588 return GL_TRUE;
2589 }
2590 break;
2591 case SQ_ALU_SCL_212:
2592 {
2593 int table[3] = {2, 1, 2};
2594 *pCycle = table[sel];
2595 return GL_TRUE;
2596 }
2597 break;
2598 case SQ_ALU_SCL_221:
2599 {
2600 int table[3] = {2, 2, 1};
2601 *pCycle = table[sel];
2602 return GL_TRUE;
2603 }
2604 break;
2605 default:
2606 radeon_error("Bad Scalar bank swizzle value\n");
2607 break;
2608 }
2609
2610 return GL_FALSE;
2611 }
2612
2613 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2614 {
2615 switch (swiz)
2616 {
2617 case SQ_ALU_VEC_012:
2618 {
2619 int table[3] = {0, 1, 2};
2620 *pCycle = table[sel];
2621 }
2622 break;
2623 case SQ_ALU_VEC_021:
2624 {
2625 int table[3] = {0, 2, 1};
2626 *pCycle = table[sel];
2627 }
2628 break;
2629 case SQ_ALU_VEC_120:
2630 {
2631 int table[3] = {1, 2, 0};
2632 *pCycle = table[sel];
2633 }
2634 break;
2635 case SQ_ALU_VEC_102:
2636 {
2637 int table[3] = {1, 0, 2};
2638 *pCycle = table[sel];
2639 }
2640 break;
2641 case SQ_ALU_VEC_201:
2642 {
2643 int table[3] = {2, 0, 1};
2644 *pCycle = table[sel];
2645 }
2646 break;
2647 case SQ_ALU_VEC_210:
2648 {
2649 int table[3] = {2, 1, 0};
2650 *pCycle = table[sel];
2651 }
2652 break;
2653 default:
2654 radeon_error("Bad Vec bank swizzle value\n");
2655 return GL_FALSE;
2656 break;
2657 }
2658
2659 return GL_TRUE;
2660 }
2661
2662 GLboolean check_scalar(r700_AssemblerBase* pAsm,
2663 R700ALUInstruction* alu_instruction_ptr)
2664 {
2665 GLuint cycle;
2666 GLuint bank_swizzle;
2667 GLuint const_count = 0;
2668
2669 BITS sel;
2670 BITS chan;
2671 BITS rel;
2672 BITS neg;
2673
2674 GLuint src;
2675
2676 BITS src_sel [3] = {0,0,0};
2677 BITS src_chan[3] = {0,0,0};
2678 BITS src_rel [3] = {0,0,0};
2679 BITS src_neg [3] = {0,0,0};
2680
2681 GLuint swizzle_key;
2682 GLuint number_of_operands;
2683
2684 if(8 == pAsm->unAsic)
2685 {
2686 number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2687 }
2688 else
2689 {
2690 number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2691 }
2692
2693 for (src=0; src<number_of_operands; src++)
2694 {
2695 get_src_properties(alu_instruction_ptr,
2696 src,
2697 &(src_sel[src]),
2698 &(src_rel[src]),
2699 &(src_chan[src]),
2700 &(src_neg[src]) );
2701 }
2702
2703
2704 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2705 (is_const( src_sel[1] ) ? 2 : 0) +
2706 (is_const( src_sel[2] ) ? 1 : 0) );
2707
2708 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2709
2710 for (src=0; src<number_of_operands; src++)
2711 {
2712 sel = src_sel [src];
2713 chan = src_chan[src];
2714 rel = src_rel [src];
2715 neg = src_neg [src];
2716
2717 if (is_const( sel ))
2718 {
2719 // Any constant, including literal and inline constants
2720 const_count++;
2721
2722 if (is_cfile( sel ))
2723 {
2724 reserve_cfile(pAsm, sel, chan);
2725 }
2726
2727 }
2728 }
2729
2730 for (src=0; src<number_of_operands; src++)
2731 {
2732 sel = src_sel [src];
2733 chan = src_chan[src];
2734 rel = src_rel [src];
2735 neg = src_neg [src];
2736
2737 if( is_gpr(sel) )
2738 {
2739 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2740
2741 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2742 {
2743 return GL_FALSE;
2744 }
2745
2746 if(cycle < const_count)
2747 {
2748 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2749 {
2750 return GL_FALSE;
2751 }
2752 }
2753 }
2754 }
2755
2756 return GL_TRUE;
2757 }
2758
2759 GLboolean check_vector(r700_AssemblerBase* pAsm,
2760 R700ALUInstruction* alu_instruction_ptr)
2761 {
2762 GLuint cycle;
2763 GLuint bank_swizzle;
2764 GLuint const_count = 0;
2765
2766 GLuint src;
2767
2768 BITS sel;
2769 BITS chan;
2770 BITS rel;
2771 BITS neg;
2772
2773 BITS src_sel [3] = {0,0,0};
2774 BITS src_chan[3] = {0,0,0};
2775 BITS src_rel [3] = {0,0,0};
2776 BITS src_neg [3] = {0,0,0};
2777
2778 GLuint swizzle_key;
2779 GLuint number_of_operands;
2780
2781 if(8 == pAsm->unAsic)
2782 {
2783 number_of_operands = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2784 }
2785 else
2786 {
2787 number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2788 }
2789
2790 for (src=0; src<number_of_operands; src++)
2791 {
2792 get_src_properties(alu_instruction_ptr,
2793 src,
2794 &(src_sel[src]),
2795 &(src_rel[src]),
2796 &(src_chan[src]),
2797 &(src_neg[src]) );
2798 }
2799
2800
2801 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2802 (is_const( src_sel[1] ) ? 2 : 0) +
2803 (is_const( src_sel[2] ) ? 1 : 0)
2804 );
2805
2806 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2807
2808 for (src=0; src<number_of_operands; src++)
2809 {
2810 sel = src_sel [src];
2811 chan = src_chan[src];
2812 rel = src_rel [src];
2813 neg = src_neg [src];
2814
2815
2816 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2817
2818 if( is_gpr(sel) )
2819 {
2820 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2821 {
2822 return GL_FALSE;
2823 }
2824
2825 if ( (src == 1) &&
2826 (sel == src_sel[0]) &&
2827 (chan == src_chan[0]) )
2828 {
2829 }
2830 else
2831 {
2832 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2833 {
2834 return GL_FALSE;
2835 }
2836 }
2837 }
2838 else if( is_const(sel) )
2839 {
2840 const_count++;
2841
2842 if( is_cfile(sel) )
2843 {
2844 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2845 {
2846 return GL_FALSE;
2847 }
2848 }
2849 }
2850 }
2851
2852 return GL_TRUE;
2853 }
2854
2855 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2856 {
2857 R700ALUInstruction * alu_instruction_ptr = NULL;
2858 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2859 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2860
2861 GLuint number_of_scalar_operations;
2862 GLboolean is_single_scalar_operation;
2863 GLuint scalar_channel_index;
2864
2865 PVSSRC * pcurrent_source;
2866 int current_source_index;
2867 GLuint contiguous_slots_needed;
2868 GLuint uNumSrc;
2869 GLboolean bSplitInst;
2870
2871 if(8 == pAsm->unAsic)
2872 {
2873 uNumSrc = EG_GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2874 }
2875 else
2876 {
2877 uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2878 }
2879
2880 //GLuint channel_swizzle, j;
2881 //GLuint chan_counter[4] = {0, 0, 0, 0};
2882 //PVSSRC * pSource[3];
2883 bSplitInst = GL_FALSE;
2884 pAsm->kcacheUsed = 0;
2885
2886 if (1 == pAsm->D.dst.math)
2887 {
2888 is_single_scalar_operation = GL_TRUE;
2889 number_of_scalar_operations = 1;
2890 }
2891 else
2892 {
2893 is_single_scalar_operation = GL_FALSE;
2894 number_of_scalar_operations = 4;
2895
2896 /* current assembler doesn't do more than 1 register per source */
2897 #if 0
2898 /* check read port, only very preliminary algorithm, not count in
2899 src0/1 same comp case and prev slot repeat case; also not count relative
2900 addressing. TODO: improve performance. */
2901 for(j=0; j<uNumSrc; j++)
2902 {
2903 pSource[j] = &(pAsm->S[j].src);
2904 }
2905 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2906 {
2907 for(j=0; j<uNumSrc; j++)
2908 {
2909 switch (scalar_channel_index)
2910 {
2911 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2912 case 1: channel_swizzle = pSource[j]->swizzley; break;
2913 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2914 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2915 default: channel_swizzle = SQ_SEL_MASK; break;
2916 }
2917 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2918 (pSource[j]->rtype == SRC_REG_GPR))
2919 && (channel_swizzle <= SQ_SEL_W) )
2920 {
2921 chan_counter[channel_swizzle]++;
2922 }
2923 }
2924 }
2925 if( (chan_counter[SQ_SEL_X] > 3)
2926 || (chan_counter[SQ_SEL_Y] > 3)
2927 || (chan_counter[SQ_SEL_Z] > 3)
2928 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2929 {
2930 bSplitInst = GL_TRUE;
2931 }
2932 #endif
2933 }
2934
2935 contiguous_slots_needed = 0;
2936
2937 if(!is_single_scalar_operation)
2938 {
2939 contiguous_slots_needed = 4;
2940 }
2941
2942 contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2943
2944 initialize(pAsm);
2945
2946 for (scalar_channel_index=0;
2947 scalar_channel_index < number_of_scalar_operations;
2948 scalar_channel_index++)
2949 {
2950 if(scalar_channel_index == (number_of_scalar_operations-1))
2951 {
2952 switch(pAsm->D2.dst2.literal_slots)
2953 {
2954 case 0:
2955 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2956 Init_R700ALUInstruction(alu_instruction_ptr);
2957 break;
2958 case 1:
2959 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2960 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2961 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2962 break;
2963 case 2:
2964 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2965 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2966 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2967 break;
2968 };
2969 }
2970 else
2971 {
2972 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2973 Init_R700ALUInstruction(alu_instruction_ptr);
2974 }
2975
2976 //src 0
2977 current_source_index = 0;
2978 pcurrent_source = &(pAsm->S[0].src);
2979
2980 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2981 current_source_index,
2982 pcurrent_source,
2983 scalar_channel_index,
2984 pAsm) )
2985 {
2986 return GL_FALSE;
2987 }
2988
2989 if (uNumSrc > 1)
2990 {
2991 // Process source 1
2992 current_source_index = 1;
2993 pcurrent_source = &(pAsm->S[current_source_index].src);
2994
2995 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2996 current_source_index,
2997 pcurrent_source,
2998 scalar_channel_index,
2999 pAsm) )
3000 {
3001 return GL_FALSE;
3002 }
3003 }
3004
3005 //other bits
3006 alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
3007
3008 if( (is_single_scalar_operation == GL_TRUE)
3009 || (GL_TRUE == bSplitInst) )
3010 {
3011 alu_instruction_ptr->m_Word0.f.last = 1;
3012 }
3013 else
3014 {
3015 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
3016 }
3017
3018 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
3019 if(1 == pAsm->D.dst.predicated)
3020 {
3021 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
3022 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
3023 }
3024 else
3025 {
3026 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3027 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3028 }
3029
3030 // dst
3031 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
3032 (pAsm->D.dst.rtype == DST_REG_OUT) )
3033 {
3034 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
3035 }
3036 else
3037 {
3038 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
3039 return GL_FALSE;
3040 }
3041
3042 if ( ADDR_RELATIVE_A0 == addrmode_PVSDST(&(pAsm->D.dst)) )
3043 {
3044 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_RELATIVE;
3045 }
3046 else
3047 {
3048 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
3049 }
3050
3051 if ( is_single_scalar_operation == GL_TRUE )
3052 {
3053 // Override scalar_channel_index since only one scalar value will be written
3054 if(pAsm->D.dst.writex)
3055 {
3056 scalar_channel_index = 0;
3057 }
3058 else if(pAsm->D.dst.writey)
3059 {
3060 scalar_channel_index = 1;
3061 }
3062 else if(pAsm->D.dst.writez)
3063 {
3064 scalar_channel_index = 2;
3065 }
3066 else if(pAsm->D.dst.writew)
3067 {
3068 scalar_channel_index = 3;
3069 }
3070 }
3071
3072 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
3073
3074 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
3075
3076 if (pAsm->D.dst.op3)
3077 {
3078 //op3
3079
3080 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
3081
3082 //There's 3rd src for op3
3083 current_source_index = 2;
3084 pcurrent_source = &(pAsm->S[current_source_index].src);
3085
3086 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
3087 current_source_index,
3088 pcurrent_source,
3089 scalar_channel_index,
3090 pAsm) )
3091 {
3092 return GL_FALSE;
3093 }
3094 }
3095 else
3096 {
3097 //op2
3098 if (pAsm->bR6xx)
3099 {
3100 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
3101
3102 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
3103 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
3104
3105 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
3106 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
3107 switch (scalar_channel_index)
3108 {
3109 case 0:
3110 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
3111 break;
3112 case 1:
3113 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
3114 break;
3115 case 2:
3116 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
3117 break;
3118 case 3:
3119 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
3120 break;
3121 default:
3122 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
3123 break;
3124 }
3125 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
3126 }
3127 else
3128 {
3129 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
3130
3131 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
3132 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
3133
3134 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
3135 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
3136 switch (scalar_channel_index)
3137 {
3138 case 0:
3139 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
3140 break;
3141 case 1:
3142 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
3143 break;
3144 case 2:
3145 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
3146 break;
3147 case 3:
3148 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
3149 break;
3150 default:
3151 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
3152 break;
3153 }
3154 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
3155 }
3156 }
3157
3158 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
3159 {
3160 return GL_FALSE;
3161 }
3162
3163 /*
3164 * Judge the type of current instruction, is it vector or scalar
3165 * instruction.
3166 */
3167 if (is_single_scalar_operation)
3168 {
3169 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
3170 {
3171 return GL_FALSE;
3172 }
3173 }
3174 else
3175 {
3176 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
3177 {
3178 return GL_FALSE;
3179 }
3180 }
3181
3182 contiguous_slots_needed -= 1;
3183 }
3184
3185 return GL_TRUE;
3186 }
3187
3188 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
3189 {
3190 BITS tmp;
3191
3192 checkop1(pAsm);
3193
3194 tmp = gethelpr(pAsm);
3195
3196 // opcode tmp.x, a.x
3197 // MOV dst, tmp.x
3198
3199 pAsm->D.dst.opcode = opcode;
3200 pAsm->D.dst.math = 1;
3201
3202 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3203 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3204 pAsm->D.dst.reg = tmp;
3205 pAsm->D.dst.writex = 1;
3206
3207 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3208 {
3209 return GL_FALSE;
3210 }
3211
3212 if( pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_RSQ )
3213 pAsm->S[0].src.abs = 1;
3214
3215 if ( GL_FALSE == next_ins(pAsm) )
3216 {
3217 return GL_FALSE;
3218 }
3219
3220 // Now replicate result to all necessary channels in destination
3221 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3222
3223 if( GL_FALSE == assemble_dst(pAsm) )
3224 {
3225 return GL_FALSE;
3226 }
3227
3228 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3229 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3230 pAsm->S[0].src.reg = tmp;
3231
3232 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3233 noneg_PVSSRC(&(pAsm->S[0].src));
3234
3235 if( GL_FALSE == next_ins(pAsm) )
3236 {
3237 return GL_FALSE;
3238 }
3239
3240 return GL_TRUE;
3241 }
3242
3243 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
3244 {
3245 checkop1(pAsm);
3246
3247 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3248
3249 if( GL_FALSE == assemble_dst(pAsm) )
3250 {
3251 return GL_FALSE;
3252 }
3253 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3254 {
3255 return GL_FALSE;
3256 }
3257
3258 pAsm->S[1].bits = pAsm->S[0].bits;
3259 flipneg_PVSSRC(&(pAsm->S[1].src));
3260
3261 if ( GL_FALSE == next_ins(pAsm) )
3262 {
3263 return GL_FALSE;
3264 }
3265
3266 return GL_TRUE;
3267 }
3268
3269 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
3270 {
3271 if( GL_FALSE == checkop2(pAsm) )
3272 {
3273 return GL_FALSE;
3274 }
3275
3276 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3277
3278 if( GL_FALSE == assemble_dst(pAsm) )
3279 {
3280 return GL_FALSE;
3281 }
3282
3283 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3284 {
3285 return GL_FALSE;
3286 }
3287
3288 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3289 {
3290 return GL_FALSE;
3291 }
3292
3293 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
3294 {
3295 flipneg_PVSSRC(&(pAsm->S[1].src));
3296 }
3297
3298 if( GL_FALSE == next_ins(pAsm) )
3299 {
3300 return GL_FALSE;
3301 }
3302
3303 return GL_TRUE;
3304 }
3305
3306 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
3307 { /* TODO: ar values dont' persist between clauses */
3308 if( GL_FALSE == checkop1(pAsm) )
3309 {
3310 return GL_FALSE;
3311 }
3312
3313 if(8 == pAsm->unAsic)
3314 {
3315 /* Evergreen */
3316
3317 /* Float to Signed Integer Using FLOOR */
3318 pAsm->D.dst.opcode = EG_OP2_INST_FLT_TO_INT_FLOOR;
3319 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3320 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3321 pAsm->D.dst.reg = 0;
3322 pAsm->D.dst.writex = 0;
3323 pAsm->D.dst.writey = 0;
3324 pAsm->D.dst.writez = 0;
3325 pAsm->D.dst.writew = 0;
3326
3327 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3328 {
3329 return GL_FALSE;
3330 }
3331
3332 if( GL_FALSE == next_ins(pAsm) )
3333 {
3334 return GL_FALSE;
3335 }
3336
3337 /* Copy Signed Integer To Integer in AR and GPR */
3338 pAsm->D.dst.opcode = EG_OP2_INST_MOVA_INT;
3339 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3340 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3341 pAsm->D.dst.reg = 0;
3342 pAsm->D.dst.writex = 0;
3343 pAsm->D.dst.writey = 0;
3344 pAsm->D.dst.writez = 0;
3345 pAsm->D.dst.writew = 0;
3346
3347 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3348 {
3349 return GL_FALSE;
3350 }
3351
3352 if( GL_FALSE == next_ins(pAsm) )
3353 {
3354 return GL_FALSE;
3355 }
3356 }
3357 else
3358 {
3359 /* r6xx/r7xx */
3360
3361 /* Truncate floating-point to the nearest integer
3362 in the range [-256, +255], and copy to AR and
3363 to a GPR.
3364 */
3365 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
3366 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3367 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3368 pAsm->D.dst.reg = 0;
3369 pAsm->D.dst.writex = 0;
3370 pAsm->D.dst.writey = 0;
3371 pAsm->D.dst.writez = 0;
3372 pAsm->D.dst.writew = 0;
3373
3374 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3375 {
3376 return GL_FALSE;
3377 }
3378
3379 if( GL_FALSE == next_ins(pAsm) )
3380 {
3381 return GL_FALSE;
3382 }
3383 }
3384
3385 return GL_TRUE;
3386 }
3387
3388 GLboolean assemble_BAD(char *opcode_str)
3389 {
3390 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
3391 return GL_FALSE;
3392 }
3393
3394 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
3395 {
3396 int tmp;
3397
3398 if( GL_FALSE == checkop3(pAsm) )
3399 {
3400 return GL_FALSE;
3401 }
3402
3403 if(8 == pAsm->unAsic)
3404 {
3405 pAsm->D.dst.opcode = EG_OP3_INST_CNDGE;
3406 }
3407 else
3408 {
3409 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
3410 }
3411 pAsm->D.dst.op3 = 1;
3412
3413 tmp = (-1);
3414
3415 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3416 {
3417 //OP3 has no support for write mask
3418 tmp = gethelpr(pAsm);
3419
3420 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3421 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3422 pAsm->D.dst.reg = tmp;
3423
3424 nomask_PVSDST(&(pAsm->D.dst));
3425 }
3426 else
3427 {
3428 if( GL_FALSE == assemble_dst(pAsm) )
3429 {
3430 return GL_FALSE;
3431 }
3432 }
3433
3434 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3435 {
3436 return GL_FALSE;
3437 }
3438
3439 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
3440 {
3441 return GL_FALSE;
3442 }
3443
3444 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
3445 {
3446 return GL_FALSE;
3447 }
3448
3449 if ( GL_FALSE == next_ins(pAsm) )
3450 {
3451 return GL_FALSE;
3452 }
3453
3454 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3455 {
3456 if( GL_FALSE == assemble_dst(pAsm) )
3457 {
3458 return GL_FALSE;
3459 }
3460
3461 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3462
3463 //tmp for source
3464 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3465 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3466 pAsm->S[0].src.reg = tmp;
3467
3468 noneg_PVSSRC(&(pAsm->S[0].src));
3469 noswizzle_PVSSRC(&(pAsm->S[0].src));
3470
3471 if( GL_FALSE == next_ins(pAsm) )
3472 {
3473 return GL_FALSE;
3474 }
3475 }
3476
3477 return GL_TRUE;
3478 }
3479
3480 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
3481 {
3482 /*
3483 * r600 - trunc to -PI..PI range
3484 * r700 - normalize by dividing by 2PI
3485 * see fdo bug 27901
3486 */
3487
3488 int tmp;
3489 checkop1(pAsm);
3490
3491 tmp = gethelpr(pAsm);
3492 if(8 == pAsm->unAsic)
3493 {
3494 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
3495 }
3496 else
3497 {
3498 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3499 }
3500 pAsm->D.dst.op3 = 1;
3501
3502 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3503 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3504 pAsm->D.dst.reg = tmp;
3505
3506 assemble_src(pAsm, 0, -1);
3507
3508 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3509 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3510
3511 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3512 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3513
3514 pAsm->D2.dst2.literal_slots = 1;
3515 pAsm->C[0].f = 1/(3.1415926535 * 2);
3516 pAsm->C[1].f = 0.5f;
3517
3518 if ( GL_FALSE == next_ins(pAsm) )
3519 {
3520 return GL_FALSE;
3521 }
3522
3523 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3524
3525 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3526 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3527 pAsm->D.dst.reg = tmp;
3528 pAsm->D.dst.writex = 1;
3529
3530 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3531 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3532 pAsm->S[0].src.reg = tmp;
3533 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3534
3535 if(( GL_FALSE == next_ins(pAsm) ))
3536 {
3537 return GL_FALSE;
3538 }
3539 if(8 == pAsm->unAsic)
3540 {
3541 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
3542 }
3543 else
3544 {
3545 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3546 }
3547 pAsm->D.dst.op3 = 1;
3548
3549 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3550 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3551 pAsm->D.dst.reg = tmp;
3552
3553 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3554 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3555 pAsm->S[0].src.reg = tmp;
3556 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3557
3558 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
3559 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3560
3561 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
3562 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
3563
3564 pAsm->D2.dst2.literal_slots = 1;
3565
3566 if (pAsm->bR6xx)
3567 {
3568 pAsm->C[0].f = 3.1415926535897f * 2.0f;
3569 pAsm->C[1].f = -3.1415926535897f;
3570 }
3571 else
3572 {
3573 pAsm->C[0].f = 1.0f;
3574 pAsm->C[1].f = -0.5f;
3575 }
3576
3577 if(( GL_FALSE == next_ins(pAsm) ))
3578 {
3579 return GL_FALSE;
3580 }
3581
3582 pAsm->D.dst.opcode = opcode;
3583 pAsm->D.dst.math = 1;
3584
3585 assemble_dst(pAsm);
3586
3587 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3588 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3589 pAsm->S[0].src.reg = tmp;
3590 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3591 noneg_PVSSRC(&(pAsm->S[0].src));
3592
3593 next_ins(pAsm);
3594
3595 //TODO - replicate if more channels set in WriteMask
3596 return GL_TRUE;
3597
3598 }
3599
3600 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
3601 {
3602 if( GL_FALSE == checkop2(pAsm) )
3603 {
3604 return GL_FALSE;
3605 }
3606
3607 if(8 == pAsm->unAsic)
3608 {
3609 pAsm->D.dst.opcode = EG_OP2_INST_DOT4;
3610 }
3611 else
3612 {
3613 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
3614 }
3615
3616 if( GL_FALSE == assemble_dst(pAsm) )
3617 {
3618 return GL_FALSE;
3619 }
3620
3621 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3622 {
3623 return GL_FALSE;
3624 }
3625
3626 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3627 {
3628 return GL_FALSE;
3629 }
3630
3631 if(OPCODE_DP2 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3632 {
3633 zerocomp_PVSSRC(&(pAsm->S[0].src),2);
3634 zerocomp_PVSSRC(&(pAsm->S[0].src),3);
3635 zerocomp_PVSSRC(&(pAsm->S[1].src),2);
3636 zerocomp_PVSSRC(&(pAsm->S[1].src),3);
3637 }
3638 else if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3639 {
3640 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3641 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3642 }
3643 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
3644 {
3645 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3646 }
3647
3648 if ( GL_FALSE == next_ins(pAsm) )
3649 {
3650 return GL_FALSE;
3651 }
3652
3653 return GL_TRUE;
3654 }
3655
3656 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3657 {
3658 if( GL_FALSE == checkop2(pAsm) )
3659 {
3660 return GL_FALSE;
3661 }
3662
3663 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3664
3665 if( GL_FALSE == assemble_dst(pAsm) )
3666 {
3667 return GL_FALSE;
3668 }
3669
3670 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3671 {
3672 return GL_FALSE;
3673 }
3674
3675 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3676 {
3677 return GL_FALSE;
3678 }
3679
3680 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3681 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3682
3683 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3684 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3685
3686 if ( GL_FALSE == next_ins(pAsm) )
3687 {
3688 return GL_FALSE;
3689 }
3690
3691 return GL_TRUE;
3692 }
3693
3694 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3695 {
3696 if(8 == pAsm->unAsic)
3697 {
3698 return assemble_math_function(pAsm, EG_OP2_INST_EXP_IEEE);
3699 }
3700
3701 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3702 }
3703
3704 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3705 {
3706 BITS tmp;
3707
3708 checkop1(pAsm);
3709
3710 tmp = gethelpr(pAsm);
3711
3712 // FLOOR tmp.x, a.x
3713 // EX2 dst.x tmp.x
3714
3715 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3716 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3717
3718 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3719 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3720 pAsm->D.dst.reg = tmp;
3721 pAsm->D.dst.writex = 1;
3722
3723 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3724 {
3725 return GL_FALSE;
3726 }
3727
3728 if( GL_FALSE == next_ins(pAsm) )
3729 {
3730 return GL_FALSE;
3731 }
3732
3733 if(8 == pAsm->unAsic)
3734 {
3735 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3736 }
3737 else
3738 {
3739 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3740 }
3741 pAsm->D.dst.math = 1;
3742
3743 if( GL_FALSE == assemble_dst(pAsm) )
3744 {
3745 return GL_FALSE;
3746 }
3747
3748 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3749
3750 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3751 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3752 pAsm->S[0].src.reg = tmp;
3753
3754 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3755 noneg_PVSSRC(&(pAsm->S[0].src));
3756
3757 if( GL_FALSE == next_ins(pAsm) )
3758 {
3759 return GL_FALSE;
3760 }
3761 }
3762
3763 // FRACT dst.y a.x
3764
3765 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3766 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3767
3768 if( GL_FALSE == assemble_dst(pAsm) )
3769 {
3770 return GL_FALSE;
3771 }
3772
3773 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3774 {
3775 return GL_FALSE;
3776 }
3777
3778 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3779
3780 if( GL_FALSE == next_ins(pAsm) )
3781 {
3782 return GL_FALSE;
3783 }
3784 }
3785
3786 // EX2 dst.z, a.x
3787
3788 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3789 if(8 == pAsm->unAsic)
3790 {
3791 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
3792 }
3793 else
3794 {
3795 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3796 }
3797 pAsm->D.dst.math = 1;
3798
3799 if( GL_FALSE == assemble_dst(pAsm) )
3800 {
3801 return GL_FALSE;
3802 }
3803
3804 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3805 {
3806 return GL_FALSE;
3807 }
3808
3809 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3810
3811 if( GL_FALSE == next_ins(pAsm) )
3812 {
3813 return GL_FALSE;
3814 }
3815 }
3816
3817 // MOV dst.w 1.0
3818
3819 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3820 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3821
3822 if( GL_FALSE == assemble_dst(pAsm) )
3823 {
3824 return GL_FALSE;
3825 }
3826
3827 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3828
3829 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3830 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3831 pAsm->S[0].src.reg = tmp;
3832
3833 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3834 noneg_PVSSRC(&(pAsm->S[0].src));
3835
3836 if( GL_FALSE == next_ins(pAsm) )
3837 {
3838 return GL_FALSE;
3839 }
3840 }
3841
3842 return GL_TRUE;
3843 }
3844
3845 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3846 {
3847 checkop1(pAsm);
3848
3849 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3850
3851 if ( GL_FALSE == assemble_dst(pAsm) )
3852 {
3853 return GL_FALSE;
3854 }
3855
3856 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3857 {
3858 return GL_FALSE;
3859 }
3860
3861 if ( GL_FALSE == next_ins(pAsm) )
3862 {
3863 return GL_FALSE;
3864 }
3865
3866 return GL_TRUE;
3867 }
3868
3869 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3870 {
3871 if(8 == pAsm->unAsic)
3872 {
3873 return assemble_math_function(pAsm, EG_OP2_INST_FLT_TO_INT);
3874 }
3875
3876 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3877 }
3878
3879 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3880 {
3881 checkop1(pAsm);
3882
3883 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3884
3885 if ( GL_FALSE == assemble_dst(pAsm) )
3886 {
3887 return GL_FALSE;
3888 }
3889
3890 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3891 {
3892 return GL_FALSE;
3893 }
3894
3895 if ( GL_FALSE == next_ins(pAsm) )
3896 {
3897 return GL_FALSE;
3898 }
3899
3900 return GL_TRUE;
3901 }
3902
3903 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3904 {
3905 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3906
3907 if(pILInst->Opcode == OPCODE_KIL)
3908 checkop1(pAsm);
3909
3910 pAsm->D.dst.opcode = opcode;
3911 //pAsm->D.dst.math = 1;
3912
3913 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3914 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3915 pAsm->D.dst.reg = 0;
3916 pAsm->D.dst.writex = 0;
3917 pAsm->D.dst.writey = 0;
3918 pAsm->D.dst.writez = 0;
3919 pAsm->D.dst.writew = 0;
3920
3921 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3922 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3923 pAsm->S[0].src.reg = 0;
3924 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3925 noneg_PVSSRC(&(pAsm->S[0].src));
3926
3927 if(pILInst->Opcode == OPCODE_KIL_NV)
3928 {
3929 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3930 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3931 pAsm->S[1].src.reg = 0;
3932 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3933 neg_PVSSRC(&(pAsm->S[1].src));
3934 }
3935 else
3936 {
3937 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3938 {
3939 return GL_FALSE;
3940 }
3941
3942 }
3943
3944 if ( GL_FALSE == next_ins(pAsm) )
3945 {
3946 return GL_FALSE;
3947 }
3948
3949 /* Doc says KILL has to be last(end) ALU clause */
3950 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3951 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3952
3953 return GL_TRUE;
3954 }
3955
3956 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3957 {
3958 if(8 == pAsm->unAsic)
3959 {
3960 return assemble_math_function(pAsm, EG_OP2_INST_LOG_IEEE);
3961 }
3962
3963 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3964 }
3965
3966 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3967 {
3968 BITS tmp;
3969
3970 if( GL_FALSE == checkop3(pAsm) )
3971 {
3972 return GL_FALSE;
3973 }
3974
3975 tmp = gethelpr(pAsm);
3976
3977 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3978
3979 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3980 pAsm->D.dst.reg = tmp;
3981 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3982 nomask_PVSDST(&(pAsm->D.dst));
3983
3984
3985 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3986 {
3987 return GL_FALSE;
3988 }
3989
3990 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3991 {
3992 return GL_FALSE;
3993 }
3994
3995 neg_PVSSRC(&(pAsm->S[1].src));
3996
3997 if( GL_FALSE == next_ins(pAsm) )
3998 {
3999 return GL_FALSE;
4000 }
4001
4002 if(8 == pAsm->unAsic)
4003 {
4004 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4005 }
4006 else
4007 {
4008 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4009 }
4010 pAsm->D.dst.op3 = 1;
4011
4012 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4013 pAsm->D.dst.reg = tmp;
4014 nomask_PVSDST(&(pAsm->D.dst));
4015 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4016
4017 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4018 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4019 pAsm->S[0].src.reg = tmp;
4020 noswizzle_PVSSRC(&(pAsm->S[0].src));
4021
4022
4023 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4024 {
4025 return GL_FALSE;
4026 }
4027
4028 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
4029 {
4030 return GL_FALSE;
4031 }
4032
4033 if( GL_FALSE == next_ins(pAsm) )
4034 {
4035 return GL_FALSE;
4036 }
4037
4038 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4039
4040 if( GL_FALSE == assemble_dst(pAsm) )
4041 {
4042 return GL_FALSE;
4043 }
4044
4045 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4046 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4047 pAsm->S[0].src.reg = tmp;
4048 noswizzle_PVSSRC(&(pAsm->S[0].src));
4049
4050 if( GL_FALSE == next_ins(pAsm) )
4051 {
4052 return GL_FALSE;
4053 }
4054
4055 return GL_TRUE;
4056 }
4057
4058 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
4059 {
4060 BITS tmp1, tmp2, tmp3;
4061
4062 checkop1(pAsm);
4063
4064 tmp1 = gethelpr(pAsm);
4065 tmp2 = gethelpr(pAsm);
4066 tmp3 = gethelpr(pAsm);
4067
4068 // FIXME: The hardware can do fabs() directly on input
4069 // elements, but the compiler doesn't have the
4070 // capability to use that.
4071
4072 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
4073
4074 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4075
4076 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4077 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4078 pAsm->D.dst.reg = tmp1;
4079 pAsm->D.dst.writex = 1;
4080
4081 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4082 {
4083 return GL_FALSE;
4084 }
4085
4086 pAsm->S[1].bits = pAsm->S[0].bits;
4087 flipneg_PVSSRC(&(pAsm->S[1].src));
4088
4089 if ( GL_FALSE == next_ins(pAsm) )
4090 {
4091 return GL_FALSE;
4092 }
4093
4094 // Entire algo:
4095 //
4096 // LG2 tmp2.x, tmp1.x
4097 // FLOOR tmp3.x, tmp2.x
4098 // MOV dst.x, tmp3.x
4099 // ADD tmp3.x, tmp2.x, -tmp3.x
4100 // EX2 dst.y, tmp3.x
4101 // MOV dst.z, tmp2.x
4102 // MOV dst.w, 1.0
4103
4104 // LG2 tmp2.x, tmp1.x
4105 // FLOOR tmp3.x, tmp2.x
4106
4107 if(8 == pAsm->unAsic)
4108 {
4109 pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4110 }
4111 else
4112 {
4113 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4114 }
4115 pAsm->D.dst.math = 1;
4116
4117 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4118 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4119 pAsm->D.dst.reg = tmp2;
4120 pAsm->D.dst.writex = 1;
4121
4122 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4123 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4124 pAsm->S[0].src.reg = tmp1;
4125
4126 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4127 noneg_PVSSRC(&(pAsm->S[0].src));
4128
4129 if( GL_FALSE == next_ins(pAsm) )
4130 {
4131 return GL_FALSE;
4132 }
4133
4134 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
4135
4136 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4137 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4138 pAsm->D.dst.reg = tmp3;
4139 pAsm->D.dst.writex = 1;
4140
4141 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4142 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4143 pAsm->S[0].src.reg = tmp2;
4144
4145 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4146 noneg_PVSSRC(&(pAsm->S[0].src));
4147
4148 if( GL_FALSE == next_ins(pAsm) )
4149 {
4150 return GL_FALSE;
4151 }
4152
4153 // MOV dst.x, tmp3.x
4154
4155 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4156
4157 if( GL_FALSE == assemble_dst(pAsm) )
4158 {
4159 return GL_FALSE;
4160 }
4161
4162 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4163
4164 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4165 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4166 pAsm->S[0].src.reg = tmp3;
4167
4168 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4169 noneg_PVSSRC(&(pAsm->S[0].src));
4170
4171 if( GL_FALSE == next_ins(pAsm) )
4172 {
4173 return GL_FALSE;
4174 }
4175
4176 // ADD tmp3.x, tmp2.x, -tmp3.x
4177 // EX2 dst.y, tmp3.x
4178
4179 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4180
4181 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4182 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4183 pAsm->D.dst.reg = tmp3;
4184 pAsm->D.dst.writex = 1;
4185
4186 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4187 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4188 pAsm->S[0].src.reg = tmp2;
4189
4190 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4191 noneg_PVSSRC(&(pAsm->S[0].src));
4192
4193 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4194 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
4195 pAsm->S[1].src.reg = tmp3;
4196
4197 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4198 neg_PVSSRC(&(pAsm->S[1].src));
4199
4200 if( GL_FALSE == next_ins(pAsm) )
4201 {
4202 return GL_FALSE;
4203 }
4204
4205 if(8 == pAsm->unAsic)
4206 {
4207 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4208 }
4209 else
4210 {
4211 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4212 }
4213 pAsm->D.dst.math = 1;
4214
4215 if( GL_FALSE == assemble_dst(pAsm) )
4216 {
4217 return GL_FALSE;
4218 }
4219
4220 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4221
4222 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4223 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4224 pAsm->S[0].src.reg = tmp3;
4225
4226 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4227 noneg_PVSSRC(&(pAsm->S[0].src));
4228
4229 if( GL_FALSE == next_ins(pAsm) )
4230 {
4231 return GL_FALSE;
4232 }
4233
4234 // MOV dst.z, tmp2.x
4235
4236 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4237
4238 if( GL_FALSE == assemble_dst(pAsm) )
4239 {
4240 return GL_FALSE;
4241 }
4242
4243 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
4244
4245 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4246 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4247 pAsm->S[0].src.reg = tmp2;
4248
4249 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4250 noneg_PVSSRC(&(pAsm->S[0].src));
4251
4252 if( GL_FALSE == next_ins(pAsm) )
4253 {
4254 return GL_FALSE;
4255 }
4256
4257 // MOV dst.w 1.0
4258
4259 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4260
4261 if( GL_FALSE == assemble_dst(pAsm) )
4262 {
4263 return GL_FALSE;
4264 }
4265
4266 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
4267
4268 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4269 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4270 pAsm->S[0].src.reg = tmp1;
4271
4272 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
4273 noneg_PVSSRC(&(pAsm->S[0].src));
4274
4275 if( GL_FALSE == next_ins(pAsm) )
4276 {
4277 return GL_FALSE;
4278 }
4279
4280 return GL_TRUE;
4281 }
4282
4283 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
4284 {
4285 int tmp, ii;
4286 GLboolean bReplaceDst = GL_FALSE;
4287 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4288
4289 if( GL_FALSE == checkop3(pAsm) )
4290 {
4291 return GL_FALSE;
4292 }
4293
4294 if(8 == pAsm->unAsic)
4295 {
4296 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4297 }
4298 else
4299 {
4300 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4301 }
4302 pAsm->D.dst.op3 = 1;
4303
4304 tmp = (-1);
4305
4306 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
4307 { /* TODO : more investigation on MAD src and dst using same register */
4308 for(ii=0; ii<3; ii++)
4309 {
4310 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
4311 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
4312 {
4313 bReplaceDst = GL_TRUE;
4314 break;
4315 }
4316 }
4317 }
4318 if(0xF != pILInst->DstReg.WriteMask)
4319 { /* OP3 has no support for write mask */
4320 bReplaceDst = GL_TRUE;
4321 }
4322
4323 if(GL_TRUE == bReplaceDst)
4324 {
4325 tmp = gethelpr(pAsm);
4326
4327 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4328 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4329 pAsm->D.dst.reg = tmp;
4330
4331 nomask_PVSDST(&(pAsm->D.dst));
4332 }
4333 else
4334 {
4335 if( GL_FALSE == assemble_dst(pAsm) )
4336 {
4337 return GL_FALSE;
4338 }
4339 }
4340
4341 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4342 {
4343 return GL_FALSE;
4344 }
4345
4346 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4347 {
4348 return GL_FALSE;
4349 }
4350
4351 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
4352 {
4353 return GL_FALSE;
4354 }
4355
4356 if ( GL_FALSE == next_ins(pAsm) )
4357 {
4358 return GL_FALSE;
4359 }
4360
4361 if (GL_TRUE == bReplaceDst)
4362 {
4363 if( GL_FALSE == assemble_dst(pAsm) )
4364 {
4365 return GL_FALSE;
4366 }
4367
4368 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4369
4370 //tmp for source
4371 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4372 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4373 pAsm->S[0].src.reg = tmp;
4374
4375 noneg_PVSSRC(&(pAsm->S[0].src));
4376 noswizzle_PVSSRC(&(pAsm->S[0].src));
4377
4378 if( GL_FALSE == next_ins(pAsm) )
4379 {
4380 return GL_FALSE;
4381 }
4382 }
4383
4384 return GL_TRUE;
4385 }
4386
4387 /* LIT dst, src */
4388 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
4389 {
4390 unsigned int dstReg;
4391 unsigned int dstType;
4392 checkop1(pAsm);
4393 int tmp = gethelpr(pAsm);
4394
4395 if( GL_FALSE == assemble_dst(pAsm) )
4396 {
4397 return GL_FALSE;
4398 }
4399 dstReg = pAsm->D.dst.reg;
4400 dstType = pAsm->D.dst.rtype;
4401
4402 /* dst.xw, <- 1.0 */
4403 if( pAsm->D.dst.writex || pAsm->D.dst.writew )
4404 {
4405 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4406 {
4407 return GL_FALSE;
4408 }
4409
4410 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4411 pAsm->D.dst.writey = 0;
4412 pAsm->D.dst.writez = 0;
4413 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4414 pAsm->S[0].src.reg = tmp;
4415 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4416 noneg_PVSSRC(&(pAsm->S[0].src));
4417 pAsm->S[0].src.swizzlex = SQ_SEL_1;
4418 pAsm->S[0].src.swizzley = SQ_SEL_1;
4419 pAsm->S[0].src.swizzlez = SQ_SEL_1;
4420 pAsm->S[0].src.swizzlew = SQ_SEL_1;
4421 if( GL_FALSE == next_ins(pAsm) )
4422 {
4423 return GL_FALSE;
4424 }
4425 }
4426
4427 if( GL_FALSE == assemble_dst(pAsm) )
4428 {
4429 return GL_FALSE;
4430 }
4431
4432 if( pAsm->D.dst.writey ) {
4433
4434 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4435 {
4436 return GL_FALSE;
4437 }
4438
4439 /* dst.y = max(src.x, 0.0) */
4440 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4441 pAsm->D.dst.writex = 0;
4442 pAsm->D.dst.writey = 1;
4443 pAsm->D.dst.writez = 0;
4444 pAsm->D.dst.writew = 0;
4445 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4446 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4447 pAsm->S[1].src.reg = tmp;
4448 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4449 noneg_PVSSRC(&(pAsm->S[1].src));
4450 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4451 pAsm->S[1].src.swizzley = SQ_SEL_0;
4452 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4453 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4454 if( GL_FALSE == next_ins(pAsm) )
4455 {
4456 return GL_FALSE;
4457 }
4458 }
4459
4460 if( GL_FALSE == assemble_dst(pAsm) )
4461 {
4462 return GL_FALSE;
4463 }
4464 if ( pAsm->D.dst.writez) {
4465
4466 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4467 {
4468 return GL_FALSE;
4469 }
4470
4471 /* dst.z = log(src.y) */
4472 if(8 == pAsm->unAsic)
4473 {
4474 pAsm->D.dst.opcode = EG_OP2_INST_LOG_CLAMPED;
4475 }
4476 else
4477 {
4478 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
4479 }
4480 pAsm->D.dst.math = 1;
4481 pAsm->D.dst.writex = 0;
4482 pAsm->D.dst.writey = 0;
4483 pAsm->D.dst.writez = 1;
4484 pAsm->D.dst.writew = 0;
4485 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
4486 if( GL_FALSE == next_ins(pAsm) )
4487 {
4488 return GL_FALSE;
4489 }
4490
4491 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4492 {
4493 return GL_FALSE;
4494 }
4495
4496 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
4497 {
4498 return GL_FALSE;
4499 }
4500
4501 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4502
4503 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4504
4505 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4506 if(8 == pAsm->unAsic)
4507 {
4508 pAsm->D.dst.opcode = EG_OP3_INST_MUL_LIT;
4509 }
4510 else
4511 {
4512 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
4513 }
4514 pAsm->D.dst.math = 1;
4515 pAsm->D.dst.op3 = 1;
4516 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4517 pAsm->D.dst.reg = tmp;
4518 pAsm->D.dst.writex = 1;
4519 pAsm->D.dst.writey = 0;
4520 pAsm->D.dst.writez = 0;
4521 pAsm->D.dst.writew = 0;
4522
4523
4524 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4525 pAsm->S[1].src.reg = dstReg;
4526 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4527 noneg_PVSSRC(&(pAsm->S[1].src));
4528 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
4529 pAsm->S[1].src.swizzley = SQ_SEL_Z;
4530 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4531 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
4532
4533 if( GL_FALSE == next_ins(pAsm) )
4534 {
4535 return GL_FALSE;
4536 }
4537
4538 /* dst.z = exp(tmp.x) */
4539 if( GL_FALSE == assemble_dst(pAsm) )
4540 {
4541 return GL_FALSE;
4542 }
4543 if(8 == pAsm->unAsic)
4544 {
4545 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4546 }
4547 else
4548 {
4549 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4550 }
4551 pAsm->D.dst.math = 1;
4552 pAsm->D.dst.writex = 0;
4553 pAsm->D.dst.writey = 0;
4554 pAsm->D.dst.writez = 1;
4555 pAsm->D.dst.writew = 0;
4556
4557 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4558 pAsm->S[0].src.reg = tmp;
4559 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4560 noneg_PVSSRC(&(pAsm->S[0].src));
4561 pAsm->S[0].src.swizzlex = SQ_SEL_X;
4562 pAsm->S[0].src.swizzley = SQ_SEL_X;
4563 pAsm->S[0].src.swizzlez = SQ_SEL_X;
4564 pAsm->S[0].src.swizzlew = SQ_SEL_X;
4565
4566 if( GL_FALSE == next_ins(pAsm) )
4567 {
4568 return GL_FALSE;
4569 }
4570 }
4571 return GL_TRUE;
4572 }
4573
4574 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
4575 {
4576 if( GL_FALSE == checkop2(pAsm) )
4577 {
4578 return GL_FALSE;
4579 }
4580
4581 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4582
4583 if( GL_FALSE == assemble_dst(pAsm) )
4584 {
4585 return GL_FALSE;
4586 }
4587
4588 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4589 {
4590 return GL_FALSE;
4591 }
4592
4593 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4594 {
4595 return GL_FALSE;
4596 }
4597
4598 if( GL_FALSE == next_ins(pAsm) )
4599 {
4600 return GL_FALSE;
4601 }
4602
4603 return GL_TRUE;
4604 }
4605
4606 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
4607 {
4608 if( GL_FALSE == checkop2(pAsm) )
4609 {
4610 return GL_FALSE;
4611 }
4612
4613 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
4614
4615 if( GL_FALSE == assemble_dst(pAsm) )
4616 {
4617 return GL_FALSE;
4618 }
4619
4620 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4621 {
4622 return GL_FALSE;
4623 }
4624
4625 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4626 {
4627 return GL_FALSE;
4628 }
4629
4630 if( GL_FALSE == next_ins(pAsm) )
4631 {
4632 return GL_FALSE;
4633 }
4634
4635 return GL_TRUE;
4636 }
4637
4638 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
4639 {
4640 checkop1(pAsm);
4641
4642 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4643
4644 if (GL_FALSE == assemble_dst(pAsm))
4645 {
4646 return GL_FALSE;
4647 }
4648
4649 if (GL_FALSE == assemble_src(pAsm, 0, -1))
4650 {
4651 return GL_FALSE;
4652 }
4653
4654 if ( GL_FALSE == next_ins(pAsm) )
4655 {
4656 return GL_FALSE;
4657 }
4658
4659 return GL_TRUE;
4660 }
4661
4662 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
4663 {
4664 if( GL_FALSE == checkop2(pAsm) )
4665 {
4666 return GL_FALSE;
4667 }
4668
4669 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4670
4671 if( GL_FALSE == assemble_dst(pAsm) )
4672 {
4673 return GL_FALSE;
4674 }
4675
4676 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4677 {
4678 return GL_FALSE;
4679 }
4680
4681 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4682 {
4683 return GL_FALSE;
4684 }
4685
4686 if( GL_FALSE == next_ins(pAsm) )
4687 {
4688 return GL_FALSE;
4689 }
4690
4691 return GL_TRUE;
4692 }
4693
4694 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
4695 {
4696 BITS tmp;
4697
4698 checkop1(pAsm);
4699
4700 tmp = gethelpr(pAsm);
4701
4702 // LG2 tmp.x, a.swizzle
4703 if(8 == pAsm->unAsic)
4704 {
4705 pAsm->D.dst.opcode = EG_OP2_INST_LOG_IEEE;
4706 }
4707 else
4708 {
4709 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4710 }
4711 pAsm->D.dst.math = 1;
4712
4713 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4714 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4715 pAsm->D.dst.reg = tmp;
4716 nomask_PVSDST(&(pAsm->D.dst));
4717
4718 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4719 {
4720 return GL_FALSE;
4721 }
4722
4723 if( GL_FALSE == next_ins(pAsm) )
4724 {
4725 return GL_FALSE;
4726 }
4727
4728 // MUL tmp.x, tmp.x, b.swizzle
4729 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4730
4731 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4732 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4733 pAsm->D.dst.reg = tmp;
4734 nomask_PVSDST(&(pAsm->D.dst));
4735
4736 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4737 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4738 pAsm->S[0].src.reg = tmp;
4739 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4740 noneg_PVSSRC(&(pAsm->S[0].src));
4741
4742 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4743 {
4744 return GL_FALSE;
4745 }
4746
4747 if( GL_FALSE == next_ins(pAsm) )
4748 {
4749 return GL_FALSE;
4750 }
4751
4752 // EX2 dst.mask, tmp.x
4753 // EX2 tmp.x, tmp.x
4754 if(8 == pAsm->unAsic)
4755 {
4756 pAsm->D.dst.opcode = EG_OP2_INST_EXP_IEEE;
4757 }
4758 else
4759 {
4760 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4761 }
4762 pAsm->D.dst.math = 1;
4763
4764 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4765 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4766 pAsm->D.dst.reg = tmp;
4767 nomask_PVSDST(&(pAsm->D.dst));
4768
4769 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4770 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4771 pAsm->S[0].src.reg = tmp;
4772 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4773 noneg_PVSSRC(&(pAsm->S[0].src));
4774
4775 if( GL_FALSE == next_ins(pAsm) )
4776 {
4777 return GL_FALSE;
4778 }
4779
4780 // Now replicate result to all necessary channels in destination
4781 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4782
4783 if( GL_FALSE == assemble_dst(pAsm) )
4784 {
4785 return GL_FALSE;
4786 }
4787
4788 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4789 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4790 pAsm->S[0].src.reg = tmp;
4791
4792 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4793 noneg_PVSSRC(&(pAsm->S[0].src));
4794
4795 if( GL_FALSE == next_ins(pAsm) )
4796 {
4797 return GL_FALSE;
4798 }
4799
4800 return GL_TRUE;
4801 }
4802
4803 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4804 {
4805 if(8 == pAsm->unAsic)
4806 {
4807 return assemble_math_function(pAsm, EG_OP2_INST_RECIP_IEEE);
4808 }
4809
4810 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4811 }
4812
4813 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4814 {
4815 if(8 == pAsm->unAsic)
4816 {
4817 return assemble_math_function(pAsm, EG_OP2_INST_RECIPSQRT_IEEE);
4818 }
4819
4820 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4821 }
4822
4823 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4824 {
4825 BITS tmp;
4826
4827 checkop1(pAsm);
4828
4829 tmp = gethelpr(pAsm);
4830
4831 if(8 == pAsm->unAsic)
4832 {
4833 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4834 }
4835 else
4836 {
4837 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4838 }
4839 pAsm->D.dst.op3 = 1;
4840
4841 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4842 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4843 pAsm->D.dst.reg = tmp;
4844
4845 assemble_src(pAsm, 0, -1);
4846
4847 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4848 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4849
4850 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4851 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4852
4853 pAsm->D2.dst2.literal_slots = 1;
4854 pAsm->C[0].f = 1/(3.1415926535 * 2);
4855 pAsm->C[1].f = 0.5F;
4856
4857 if ( GL_FALSE == next_ins(pAsm) )
4858 {
4859 return GL_FALSE;
4860 }
4861
4862 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
4863
4864 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4865 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4866 pAsm->D.dst.reg = tmp;
4867 pAsm->D.dst.writex = 1;
4868
4869 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4870 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4871 pAsm->S[0].src.reg = tmp;
4872 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4873
4874 if(( GL_FALSE == next_ins(pAsm) ))
4875 {
4876 return GL_FALSE;
4877 }
4878 if(8 == pAsm->unAsic)
4879 {
4880 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
4881 }
4882 else
4883 {
4884 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4885 }
4886 pAsm->D.dst.op3 = 1;
4887
4888 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4889 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4890 pAsm->D.dst.reg = tmp;
4891
4892 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4893 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4894 pAsm->S[0].src.reg = tmp;
4895 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4896
4897 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4898 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4899
4900 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4901 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_Y);
4902
4903 pAsm->D2.dst2.literal_slots = 1;
4904
4905 if(pAsm->bR6xx) {
4906 pAsm->C[0].f = 3.1415926535897f * 2.0f;
4907 pAsm->C[1].f = -3.1415926535897f;
4908 } else {
4909 pAsm->C[0].f = 1.0f;
4910 pAsm->C[1].f = -0.5f;
4911 }
4912
4913 if(( GL_FALSE == next_ins(pAsm) ))
4914 {
4915 return GL_FALSE;
4916 }
4917
4918 // COS dst.x, a.x
4919 if(8 == pAsm->unAsic)
4920 {
4921 pAsm->D.dst.opcode = EG_OP2_INST_COS;
4922 }
4923 else
4924 {
4925 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4926 }
4927 pAsm->D.dst.math = 1;
4928
4929 assemble_dst(pAsm);
4930 /* mask y */
4931 pAsm->D.dst.writey = 0;
4932
4933 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4934 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4935 pAsm->S[0].src.reg = tmp;
4936 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4937 noneg_PVSSRC(&(pAsm->S[0].src));
4938
4939 if ( GL_FALSE == next_ins(pAsm) )
4940 {
4941 return GL_FALSE;
4942 }
4943
4944 // SIN dst.y, a.x
4945 if(8 == pAsm->unAsic)
4946 {
4947 pAsm->D.dst.opcode = EG_OP2_INST_SIN;
4948 }
4949 else
4950 {
4951 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4952 }
4953 pAsm->D.dst.math = 1;
4954
4955 assemble_dst(pAsm);
4956 /* mask x */
4957 pAsm->D.dst.writex = 0;
4958
4959 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4960 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4961 pAsm->S[0].src.reg = tmp;
4962 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4963 noneg_PVSSRC(&(pAsm->S[0].src));
4964
4965 if( GL_FALSE == next_ins(pAsm) )
4966 {
4967 return GL_FALSE;
4968 }
4969
4970 return GL_TRUE;
4971 }
4972
4973 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4974 {
4975 if( GL_FALSE == checkop2(pAsm) )
4976 {
4977 return GL_FALSE;
4978 }
4979
4980 pAsm->D.dst.opcode = opcode;
4981 //pAsm->D.dst.math = 1;
4982
4983 if( GL_FALSE == assemble_dst(pAsm) )
4984 {
4985 return GL_FALSE;
4986 }
4987
4988 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4989 {
4990 return GL_FALSE;
4991 }
4992
4993 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4994 {
4995 return GL_FALSE;
4996 }
4997
4998 if( GL_FALSE == next_ins(pAsm) )
4999 {
5000 return GL_FALSE;
5001 }
5002
5003 return GL_TRUE;
5004 }
5005
5006 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
5007 {
5008 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
5009
5010 pAsm->D.dst.opcode = opcode;
5011 pAsm->D.dst.math = 1;
5012 pAsm->D.dst.predicated = 1;
5013
5014 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5015 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5016 pAsm->D.dst.reg = pAsm->uHelpReg;
5017 pAsm->D.dst.writex = 1;
5018 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
5019
5020 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5021 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5022 pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
5023 pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
5024 noneg_PVSSRC(&(pAsm->S[0].src));
5025
5026 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5027 pAsm->S[1].src.reg = pAsm->uHelpReg;
5028 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5029 noneg_PVSSRC(&(pAsm->S[1].src));
5030 pAsm->S[1].src.swizzlex = SQ_SEL_0;
5031 pAsm->S[1].src.swizzley = SQ_SEL_0;
5032 pAsm->S[1].src.swizzlez = SQ_SEL_0;
5033 pAsm->S[1].src.swizzlew = SQ_SEL_0;
5034
5035 if( GL_FALSE == next_ins(pAsm) )
5036 {
5037 return GL_FALSE;
5038 }
5039
5040 return GL_TRUE;
5041 }
5042
5043 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
5044 {
5045 if( GL_FALSE == checkop2(pAsm) )
5046 {
5047 return GL_FALSE;
5048 }
5049
5050 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
5051
5052 if( GL_FALSE == assemble_dst(pAsm) )
5053 {
5054 return GL_FALSE;
5055 }
5056
5057 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5058 {
5059 return GL_FALSE;
5060 }
5061
5062 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5063 {
5064 return GL_FALSE;
5065 }
5066
5067 if( GL_FALSE == next_ins(pAsm) )
5068 {
5069 return GL_FALSE;
5070 }
5071
5072 return GL_TRUE;
5073 }
5074
5075 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
5076 {
5077 if( GL_FALSE == checkop2(pAsm) )
5078 {
5079 return GL_FALSE;
5080 }
5081
5082 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
5083
5084 if( GL_FALSE == assemble_dst(pAsm) )
5085 {
5086 return GL_FALSE;
5087 }
5088
5089 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
5090 {
5091 return GL_FALSE;
5092 }
5093
5094 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
5095 {
5096 return GL_FALSE;
5097 }
5098
5099 if( GL_FALSE == next_ins(pAsm) )
5100 {
5101 return GL_FALSE;
5102 }
5103
5104 return GL_TRUE;
5105 }
5106
5107 GLboolean assemble_SSG(r700_AssemblerBase *pAsm)
5108 {
5109 checkop1(pAsm);
5110
5111 GLuint tmp = gethelpr(pAsm);
5112 /* tmp = (src > 0 ? 1 : src) */
5113 if(8 == pAsm->unAsic)
5114 {
5115 pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
5116 }
5117 else
5118 {
5119 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5120 }
5121 pAsm->D.dst.op3 = 1;
5122 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5123 pAsm->D.dst.reg = tmp;
5124
5125 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5126 {
5127 return GL_FALSE;
5128 }
5129
5130 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5131
5132 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
5133 {
5134 return GL_FALSE;
5135 }
5136
5137 if( GL_FALSE == next_ins(pAsm) )
5138 {
5139 return GL_FALSE;
5140 }
5141
5142 /* dst = (-tmp > 0 ? -1 : tmp) */
5143 if(8 == pAsm->unAsic)
5144 {
5145 pAsm->D.dst.opcode = EG_OP3_INST_CNDGT;
5146 }
5147 else
5148 {
5149 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGT;
5150 }
5151 pAsm->D.dst.op3 = 1;
5152
5153 if( GL_FALSE == assemble_dst(pAsm) )
5154 {
5155 return GL_FALSE;
5156 }
5157
5158 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5159 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5160 pAsm->S[0].src.reg = tmp;
5161 noswizzle_PVSSRC(&(pAsm->S[0].src));
5162 neg_PVSSRC(&(pAsm->S[0].src));
5163
5164 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
5165 neg_PVSSRC(&(pAsm->S[1].src));
5166
5167 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5168 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5169 pAsm->S[2].src.reg = tmp;
5170 noswizzle_PVSSRC(&(pAsm->S[2].src));
5171
5172 if( GL_FALSE == next_ins(pAsm) )
5173 {
5174 return GL_FALSE;
5175 }
5176
5177 return GL_TRUE;
5178 }
5179
5180 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
5181 {
5182 return GL_TRUE;
5183 }
5184
5185 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
5186 {
5187 GLboolean src_const;
5188 GLboolean need_barrier = GL_FALSE;
5189
5190 checkop1(pAsm);
5191
5192 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
5193 {
5194 case PROGRAM_UNIFORM:
5195 case PROGRAM_CONSTANT:
5196 case PROGRAM_LOCAL_PARAM:
5197 case PROGRAM_ENV_PARAM:
5198 case PROGRAM_STATE_VAR:
5199 src_const = GL_TRUE;
5200 break;
5201 case PROGRAM_TEMPORARY:
5202 case PROGRAM_INPUT:
5203 default:
5204 src_const = GL_FALSE;
5205 break;
5206 }
5207
5208 if (GL_TRUE == src_const)
5209 {
5210 if ( GL_FALSE == mov_temp(pAsm, 0) )
5211 return GL_FALSE;
5212 need_barrier = GL_TRUE;
5213 }
5214
5215 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5216 {
5217 GLuint tmp = gethelpr(pAsm);
5218 if(8 == pAsm->unAsic)
5219 {
5220 pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5221 }
5222 else
5223 {
5224 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5225 }
5226 pAsm->D.dst.math = 1;
5227 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5228 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5229 pAsm->D.dst.reg = tmp;
5230 pAsm->D.dst.writew = 1;
5231
5232 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5233 {
5234 return GL_FALSE;
5235 }
5236 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
5237 if( GL_FALSE == next_ins(pAsm) )
5238 {
5239 return GL_FALSE;
5240 }
5241
5242 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5243 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5244 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5245 pAsm->D.dst.reg = tmp;
5246 pAsm->D.dst.writex = 1;
5247 pAsm->D.dst.writey = 1;
5248 pAsm->D.dst.writez = 1;
5249 pAsm->D.dst.writew = 0;
5250
5251 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5252 {
5253 return GL_FALSE;
5254 }
5255 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5256 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5257 pAsm->S[1].src.reg = tmp;
5258 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
5259
5260 if( GL_FALSE == next_ins(pAsm) )
5261 {
5262 return GL_FALSE;
5263 }
5264
5265 pAsm->aArgSubst[1] = tmp;
5266 need_barrier = GL_TRUE;
5267 }
5268
5269 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
5270 {
5271 GLuint tmp1 = gethelpr(pAsm);
5272 GLuint tmp2 = gethelpr(pAsm);
5273
5274 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
5275 if(8 == pAsm->unAsic)
5276 {
5277 pAsm->D.dst.opcode = EG_OP2_INST_CUBE;
5278 }
5279 else
5280 {
5281 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
5282 }
5283 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5284 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5285 pAsm->D.dst.reg = tmp1;
5286 nomask_PVSDST(&(pAsm->D.dst));
5287
5288 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5289 {
5290 return GL_FALSE;
5291 }
5292
5293 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
5294 {
5295 return GL_FALSE;
5296 }
5297
5298 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
5299 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
5300
5301 if( GL_FALSE == next_ins(pAsm) )
5302 {
5303 return GL_FALSE;
5304 }
5305
5306 /* tmp1.z = RCP_e(|tmp1.z|) */
5307 if(8 == pAsm->unAsic)
5308 {
5309 pAsm->D.dst.opcode = EG_OP2_INST_RECIP_IEEE;
5310 }
5311 else
5312 {
5313 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
5314 }
5315 pAsm->D.dst.math = 1;
5316 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5317 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5318 pAsm->D.dst.reg = tmp1;
5319 pAsm->D.dst.writez = 1;
5320
5321 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5322 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5323 pAsm->S[0].src.reg = tmp1;
5324 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
5325 pAsm->S[0].src.abs = 1;
5326
5327 next_ins(pAsm);
5328
5329 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
5330 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
5331 * muladd has no writemask, have to use another temp
5332 */
5333 if(8 == pAsm->unAsic)
5334 {
5335 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5336 }
5337 else
5338 {
5339 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5340 }
5341 pAsm->D.dst.op3 = 1;
5342 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5343 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5344 pAsm->D.dst.reg = tmp2;
5345
5346 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5347 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5348 pAsm->S[0].src.reg = tmp1;
5349 noswizzle_PVSSRC(&(pAsm->S[0].src));
5350 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5351 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5352 pAsm->S[1].src.reg = tmp1;
5353 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
5354 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
5355 /* immediate c 1.5 */
5356 pAsm->D2.dst2.literal_slots = 1;
5357 pAsm->C[0].f = 1.5F;
5358 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
5359 pAsm->S[2].src.reg = tmp1;
5360 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
5361
5362 next_ins(pAsm);
5363
5364 /* tmp1.xy = temp2.xy */
5365 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5366 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5367 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5368 pAsm->D.dst.reg = tmp1;
5369 pAsm->D.dst.writex = 1;
5370 pAsm->D.dst.writey = 1;
5371 pAsm->D.dst.writez = 0;
5372 pAsm->D.dst.writew = 0;
5373
5374 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5375 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5376 pAsm->S[0].src.reg = tmp2;
5377 noswizzle_PVSSRC(&(pAsm->S[0].src));
5378
5379 next_ins(pAsm);
5380 pAsm->aArgSubst[1] = tmp1;
5381 need_barrier = GL_TRUE;
5382
5383 }
5384
5385 switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
5386 {
5387 case OPCODE_DDX:
5388 /* will these need WQM(1) on CF inst ? */
5389 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
5390 break;
5391 case OPCODE_DDY:
5392 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
5393 break;
5394 case OPCODE_TXB:
5395 /* this should actually be SAMPLE_LB but that needs bias to be
5396 * embedded in the instruction - cant do here */
5397 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5398 break;
5399 case OPCODE_TXL:
5400 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
5401 break;
5402 default:
5403 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5404 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
5405 else
5406 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
5407 }
5408
5409 pAsm->is_tex = GL_TRUE;
5410 if ( GL_TRUE == need_barrier )
5411
5412 pAsm->is_tex = GL_TRUE;
5413 if ( GL_TRUE == need_barrier )
5414 {
5415 pAsm->need_tex_barrier = GL_TRUE;
5416 }
5417 // Set src1 to tex unit id
5418 pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5419 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
5420
5421 //No sw info from mesa compiler, so hard code here.
5422 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5423 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5424 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5425 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5426
5427 if( GL_FALSE == tex_dst(pAsm) )
5428 {
5429 return GL_FALSE;
5430 }
5431
5432 if( GL_FALSE == tex_src(pAsm) )
5433 {
5434 return GL_FALSE;
5435 }
5436
5437 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
5438 {
5439 /* hopefully did swizzles before */
5440 noswizzle_PVSSRC(&(pAsm->S[0].src));
5441 }
5442
5443 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
5444 {
5445 /* SAMPLE dst, tmp.yxwy, CUBE */
5446 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
5447 pAsm->S[0].src.swizzley = SQ_SEL_X;
5448 pAsm->S[0].src.swizzlez = SQ_SEL_W;
5449 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
5450 }
5451
5452 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5453 {
5454 /* compare value goes to w chan ? */
5455 pAsm->S[0].src.swizzlew = SQ_SEL_Z;
5456 }
5457
5458 if ( GL_FALSE == next_ins(pAsm) )
5459 {
5460 return GL_FALSE;
5461 }
5462
5463 /* add ARB shadow ambient but clamp to 0..1 */
5464 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
5465 {
5466 /* ADD_SAT dst, dst, ambient[texunit] */
5467 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
5468
5469 if( GL_FALSE == assemble_dst(pAsm) )
5470 {
5471 return GL_FALSE;
5472 }
5473 pAsm->D2.dst2.SaturateMode = 1;
5474
5475 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5476 pAsm->S[0].src.reg = pAsm->D.dst.reg;
5477 noswizzle_PVSSRC(&(pAsm->S[0].src));
5478 noneg_PVSSRC(&(pAsm->S[0].src));
5479
5480 pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
5481 pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
5482 noswizzle_PVSSRC(&(pAsm->S[1].src));
5483 noneg_PVSSRC(&(pAsm->S[1].src));
5484
5485 if( GL_FALSE == next_ins(pAsm) )
5486 {
5487 return GL_FALSE;
5488 }
5489
5490 }
5491
5492 return GL_TRUE;
5493 }
5494
5495 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
5496 {
5497 BITS tmp1;
5498 BITS tmp2 = 0;
5499
5500 if( GL_FALSE == checkop2(pAsm) )
5501 {
5502 return GL_FALSE;
5503 }
5504
5505 tmp1 = gethelpr(pAsm);
5506
5507 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
5508
5509 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5510 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5511 pAsm->D.dst.reg = tmp1;
5512 nomask_PVSDST(&(pAsm->D.dst));
5513
5514 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5515 {
5516 return GL_FALSE;
5517 }
5518
5519 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5520 {
5521 return GL_FALSE;
5522 }
5523
5524 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5525 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5526
5527 if( GL_FALSE == next_ins(pAsm) )
5528 {
5529 return GL_FALSE;
5530 }
5531
5532 if(8 == pAsm->unAsic)
5533 {
5534 pAsm->D.dst.opcode = EG_OP3_INST_MULADD;
5535 }
5536 else
5537 {
5538 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
5539 }
5540 pAsm->D.dst.op3 = 1;
5541
5542 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
5543 {
5544 tmp2 = gethelpr(pAsm);
5545
5546 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
5547 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5548 pAsm->D.dst.reg = tmp2;
5549
5550 nomask_PVSDST(&(pAsm->D.dst));
5551 }
5552 else
5553 {
5554 if( GL_FALSE == assemble_dst(pAsm) )
5555 {
5556 return GL_FALSE;
5557 }
5558 }
5559
5560 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
5561 {
5562 return GL_FALSE;
5563 }
5564
5565 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
5566 {
5567 return GL_FALSE;
5568 }
5569
5570 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
5571 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
5572
5573 // result1 + (neg) result0
5574 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
5575 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
5576 pAsm->S[2].src.reg = tmp1;
5577
5578 neg_PVSSRC(&(pAsm->S[2].src));
5579 noswizzle_PVSSRC(&(pAsm->S[2].src));
5580
5581 if( GL_FALSE == next_ins(pAsm) )
5582 {
5583 return GL_FALSE;
5584 }
5585
5586
5587 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
5588 {
5589 if( GL_FALSE == assemble_dst(pAsm) )
5590 {
5591 return GL_FALSE;
5592 }
5593
5594 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5595
5596 // Use tmp as source
5597 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5598 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
5599 pAsm->S[0].src.reg = tmp2;
5600
5601 noneg_PVSSRC(&(pAsm->S[0].src));
5602 noswizzle_PVSSRC(&(pAsm->S[0].src));
5603
5604 if( GL_FALSE == next_ins(pAsm) )
5605 {
5606 return GL_FALSE;
5607 }
5608 }
5609
5610 return GL_TRUE;
5611 }
5612
5613 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
5614 {
5615 return GL_TRUE;
5616 }
5617
5618 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
5619 {
5620 switch (uReason)
5621 {
5622 case FC_PUSH_VPM:
5623 pAsm->CALLSTACK[pAsm->CALLSP].current--;
5624 break;
5625 case FC_PUSH_WQM:
5626 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5627 break;
5628 case FC_LOOP:
5629 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
5630 break;
5631 case FC_REP:
5632 /* TODO : for 16 vp asic, should -= 2; */
5633 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
5634 break;
5635 };
5636 }
5637
5638 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
5639 {
5640 if(GL_TRUE == bCheckMaxOnly)
5641 {
5642 switch (uReason)
5643 {
5644 case FC_PUSH_VPM:
5645 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
5646 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5647 {
5648 pAsm->CALLSTACK[pAsm->CALLSP].max =
5649 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
5650 }
5651 break;
5652 case FC_PUSH_WQM:
5653 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
5654 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5655 {
5656 pAsm->CALLSTACK[pAsm->CALLSP].max =
5657 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
5658 }
5659 break;
5660 }
5661 return;
5662 }
5663
5664 switch (uReason)
5665 {
5666 case FC_PUSH_VPM:
5667 pAsm->CALLSTACK[pAsm->CALLSP].current++;
5668 break;
5669 case FC_PUSH_WQM:
5670 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5671 break;
5672 case FC_LOOP:
5673 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5674 break;
5675 case FC_REP:
5676 /* TODO : for 16 vp asic, should += 2; */
5677 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
5678 break;
5679 };
5680
5681 if(pAsm->CALLSTACK[pAsm->CALLSP].current
5682 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5683 {
5684 pAsm->CALLSTACK[pAsm->CALLSP].max =
5685 pAsm->CALLSTACK[pAsm->CALLSP].current;
5686 }
5687 }
5688
5689 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
5690 {
5691 if(GL_FALSE == add_cf_instruction(pAsm) )
5692 {
5693 return GL_FALSE;
5694 }
5695
5696 if(8 == pAsm->unAsic)
5697 {
5698 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5699 EG_CF_INST_JUMP,
5700 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5701 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5702 pops,
5703 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5704 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5705 0,
5706 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5707 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5708 SQ_CF_COND_ACTIVE,
5709 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5710 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5711 0,
5712 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5713 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5714 0,
5715 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5716 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5717 0,
5718 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5719 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5720 0,
5721 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5722 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5723 1,
5724 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5725 }
5726 else
5727 {
5728 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5729 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5730 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5731
5732 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5733 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5734 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5735 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5736
5737 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5738 }
5739
5740 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
5741
5742 return GL_TRUE;
5743 }
5744
5745 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
5746 {
5747 if(GL_FALSE == add_cf_instruction(pAsm) )
5748 {
5749 return GL_FALSE;
5750 }
5751
5752 if(8 == pAsm->unAsic)
5753 {
5754 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5755 EG_CF_INST_POP,
5756 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5757 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5758 pops,
5759 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5760 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5761 0,
5762 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5763 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5764 SQ_CF_COND_ACTIVE,
5765 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5766 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5767 0,
5768 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5769 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5770 0,
5771 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5772 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5773 0,
5774 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5775 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5776 1,
5777 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5778 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5779 0,
5780 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5781 }
5782 else
5783 {
5784 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5785 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5786 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5787
5788 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5789 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5790 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5791
5792 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5793
5794 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5795 }
5796 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5797
5798 return GL_TRUE;
5799 }
5800
5801 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
5802 {
5803 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5804
5805 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
5806
5807
5808 if(GL_FALSE == add_cf_instruction(pAsm) )
5809 {
5810 return GL_FALSE;
5811 }
5812
5813 if(8 == pAsm->unAsic)
5814 {
5815 if(GL_TRUE != bHasElse)
5816 {
5817 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5818 1,
5819 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5820 }
5821 else
5822 {
5823 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5824 0,
5825 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5826 }
5827
5828 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5829 EG_CF_INST_JUMP,
5830 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5831 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5832 0,
5833 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5834 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5835 SQ_CF_COND_ACTIVE,
5836 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5837 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5838 0,
5839 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5840 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5841 0,
5842 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5843 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5844 0,
5845 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5846 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5847 1,
5848 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5849 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5850 0,
5851 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5852 }
5853 else
5854 {
5855 if(GL_TRUE != bHasElse)
5856 {
5857 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5858 }
5859 else
5860 {
5861 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5862 }
5863 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5864 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5865
5866 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5867 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5868 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5869 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5870
5871 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5872 }
5873
5874 pAsm->FCSP++;
5875 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
5876 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5877 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
5878 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5879
5880 #ifndef USE_CF_FOR_POP_AFTER
5881 if(GL_TRUE != bHasElse)
5882 {
5883 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5884 }
5885 #endif /* USE_CF_FOR_POP_AFTER */
5886
5887 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
5888
5889 return GL_TRUE;
5890 }
5891
5892 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
5893 {
5894 if(GL_FALSE == add_cf_instruction(pAsm) )
5895 {
5896 return GL_FALSE;
5897 }
5898
5899 if(8 == pAsm->unAsic)
5900 {
5901 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5902 1,
5903 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
5904 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5905 EG_CF_INST_ELSE,
5906 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
5907 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5908 0,
5909 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
5910 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5911 SQ_CF_COND_ACTIVE,
5912 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
5913 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5914 0,
5915 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
5916 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5917 0,
5918 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
5919 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5920 0,
5921 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
5922 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5923 1,
5924 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
5925 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
5926 0,
5927 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
5928 }
5929 else
5930 {
5931 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
5932 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5933 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5934
5935 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5936 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5937 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
5938 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5939
5940 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5941 }
5942
5943 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
5944 0,
5945 sizeof(R700ControlFlowGenericClause *) );
5946 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
5947 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5948
5949 #ifndef USE_CF_FOR_POP_AFTER
5950 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5951 #endif /* USE_CF_FOR_POP_AFTER */
5952
5953 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
5954
5955 return GL_TRUE;
5956 }
5957
5958 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
5959 {
5960 #ifdef USE_CF_FOR_POP_AFTER
5961 pops(pAsm, 1);
5962 #endif /* USE_CF_FOR_POP_AFTER */
5963
5964 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5965
5966 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
5967 {
5968 /* no else in between */
5969 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5970 }
5971 else
5972 {
5973 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5974 }
5975
5976 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5977 {
5978 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5979 }
5980
5981 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
5982 {
5983 radeon_error("if/endif in shader code are not paired. \n");
5984 return GL_FALSE;
5985 }
5986
5987 pAsm->FCSP--;
5988
5989 decreaseCurrent(pAsm, FC_PUSH_VPM);
5990
5991 return GL_TRUE;
5992 }
5993
5994 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
5995 {
5996 if(GL_FALSE == add_cf_instruction(pAsm) )
5997 {
5998 return GL_FALSE;
5999 }
6000
6001 if(8 == pAsm->unAsic)
6002 {
6003 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6004 0,
6005 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6006 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6007 EG_CF_INST_LOOP_START_NO_AL,
6008 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6009 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6010 0,
6011 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6012 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6013 SQ_CF_COND_ACTIVE,
6014 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6015 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6016 0,
6017 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6018 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6019 0,
6020 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6021 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6022 0,
6023 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6024 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6025 1,
6026 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6027 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6028 0,
6029 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6030 }
6031 else
6032 {
6033 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6034 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6035 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6036
6037 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6038 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6039 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
6040 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6041
6042 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6043 }
6044
6045 pAsm->FCSP++;
6046 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
6047 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
6048 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
6049 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
6050 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
6051
6052 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
6053
6054 return GL_TRUE;
6055 }
6056
6057 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
6058 {
6059 #ifdef USE_CF_FOR_CONTINUE_BREAK
6060
6061 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6062
6063 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
6064
6065 unsigned int unFCSP;
6066 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
6067 {
6068 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6069 {
6070 break;
6071 }
6072 }
6073 if(0 == FC_LOOP)
6074 {
6075 radeon_error("Break is not inside loop/endloop pair.\n");
6076 return GL_FALSE;
6077 }
6078
6079 if(GL_FALSE == add_cf_instruction(pAsm) )
6080 {
6081 return GL_FALSE;
6082 }
6083
6084 if(8 == pAsm->unAsic)
6085 {
6086 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6087 1,
6088 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6089 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6090 EG_CF_INST_LOOP_BREAK,
6091 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6092 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6093 0,
6094 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6095 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6096 SQ_CF_COND_ACTIVE,
6097 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6098 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6099 0,
6100 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6101 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6102 0,
6103 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6104 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6105 0,
6106 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6107 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6108 1,
6109 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6110 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6111 0,
6112 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6113 }
6114 else
6115 {
6116 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6117 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6118 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6119
6120 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6121 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6122 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
6123
6124 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6125
6126 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6127 }
6128
6129 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6130 (void *)pAsm->fc_stack[unFCSP].mid,
6131 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6132 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6133 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6134 pAsm->fc_stack[unFCSP].unNumMid++;
6135
6136 if(GL_FALSE == add_cf_instruction(pAsm) )
6137 {
6138 return GL_FALSE;
6139 }
6140
6141 if(8 == pAsm->unAsic)
6142 {
6143 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6144 1,
6145 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6146 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6147 EG_CF_INST_POP,
6148 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6149 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6150 0,
6151 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6152 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6153 SQ_CF_COND_ACTIVE,
6154 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6155 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6156 0,
6157 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6158 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6159 0,
6160 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6161 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6162 0,
6163 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6164 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6165 1,
6166 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6167 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6168 0,
6169 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6170 }
6171 else
6172 {
6173 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6174 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6175 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6176
6177 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6178 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6179 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
6180
6181 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6182
6183 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6184 }
6185
6186 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6187
6188 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6189
6190 #endif //USE_CF_FOR_CONTINUE_BREAK
6191 return GL_TRUE;
6192 }
6193
6194 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
6195 {
6196 #ifdef USE_CF_FOR_CONTINUE_BREAK
6197 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6198
6199 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
6200
6201 unsigned int unFCSP;
6202 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
6203 {
6204 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6205 {
6206 break;
6207 }
6208 }
6209 if(0 == FC_LOOP)
6210 {
6211 radeon_error("Continue is not inside loop/endloop pair.\n");
6212 return GL_FALSE;
6213 }
6214
6215 if(GL_FALSE == add_cf_instruction(pAsm) )
6216 {
6217 return GL_FALSE;
6218 }
6219
6220 if(8 == pAsm->unAsic)
6221 {
6222 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6223 1,
6224 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6225 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6226 EG_CF_INST_LOOP_CONTINUE,
6227 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6228 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6229 0,
6230 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6231 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6232 SQ_CF_COND_ACTIVE,
6233 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6234 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6235 0,
6236 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6237 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6238 0,
6239 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6240 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6241 0,
6242 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6243 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6244 1,
6245 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6246 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6247 0,
6248 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6249 }
6250 else
6251 {
6252 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6253 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6254 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6255
6256 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6257 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6258 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
6259
6260 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6261
6262 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6263 }
6264
6265 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6266 (void *)pAsm->fc_stack[unFCSP].mid,
6267 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6268 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6269 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6270 pAsm->fc_stack[unFCSP].unNumMid++;
6271
6272 if(GL_FALSE == add_cf_instruction(pAsm) )
6273 {
6274 return GL_FALSE;
6275 }
6276
6277 if(8 == pAsm->unAsic)
6278 {
6279 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6280 1,
6281 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6282 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6283 EG_CF_INST_POP,
6284 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6285 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6286 0,
6287 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6288 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6289 SQ_CF_COND_ACTIVE,
6290 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6291 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6292 0,
6293 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6294 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6295 0,
6296 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6297 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6298 0,
6299 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6300 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6301 1,
6302 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6303 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6304 0,
6305 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6306 }
6307 else
6308 {
6309 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6310 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6311 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6312
6313 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6314 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6315 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
6316
6317 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6318
6319 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6320 }
6321
6322 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6323
6324 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6325
6326 #endif /* USE_CF_FOR_CONTINUE_BREAK */
6327
6328 return GL_TRUE;
6329 }
6330
6331 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
6332 {
6333 GLuint i;
6334
6335 if(GL_FALSE == add_cf_instruction(pAsm) )
6336 {
6337 return GL_FALSE;
6338 }
6339
6340 if(8 == pAsm->unAsic)
6341 {
6342 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6343 0,
6344 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6345 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6346 EG_CF_INST_LOOP_END,
6347 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6348 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6349 0,
6350 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6351 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6352 SQ_CF_COND_ACTIVE,
6353 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6354 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6355 0,
6356 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6357 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6358 0,
6359 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6360 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6361 0,
6362 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6363 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6364 1,
6365 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6366 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6367 0,
6368 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6369 }
6370 else
6371 {
6372 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6373 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6374 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6375
6376 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6377 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6378 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
6379 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6380
6381 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6382 }
6383
6384 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
6385 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
6386
6387 #ifdef USE_CF_FOR_CONTINUE_BREAK
6388 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
6389 {
6390 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
6391 }
6392 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
6393 {
6394 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
6395 }
6396 #endif
6397
6398 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
6399 {
6400 radeon_error("loop/endloop in shader code are not paired. \n");
6401 return GL_FALSE;
6402 }
6403
6404 GLuint unFCSP;
6405 GLuint unIF = 0;
6406 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
6407 {
6408 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6409 {
6410 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6411 {
6412 breakLoopOnFlag(pAsm, unFCSP);
6413 break;
6414 }
6415 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6416 {
6417 unIF++;
6418 }
6419 }
6420 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
6421 {
6422 #ifdef USE_CF_FOR_POP_AFTER
6423 returnOnFlag(pAsm, unIF);
6424 #else
6425 returnOnFlag(pAsm, 0);
6426 #endif /* USE_CF_FOR_POP_AFTER */
6427 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
6428 }
6429 }
6430
6431 pAsm->FCSP--;
6432
6433 decreaseCurrent(pAsm, FC_LOOP);
6434
6435 return GL_TRUE;
6436 }
6437
6438 void add_return_inst(r700_AssemblerBase *pAsm)
6439 {
6440 if(GL_FALSE == add_cf_instruction(pAsm) )
6441 {
6442 return;
6443 }
6444
6445 if(8 == pAsm->unAsic)
6446 {
6447 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6448 0,
6449 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6450 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6451 EG_CF_INST_RETURN,
6452 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6453 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6454 0,
6455 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6456 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6457 SQ_CF_COND_ACTIVE,
6458 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6459 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6460 0,
6461 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6462 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6463 0,
6464 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6465 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6466 0,
6467 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6468 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6469 1,
6470 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6471 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6472 0,
6473 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6474 }
6475 else
6476 {
6477 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6478 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6479 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6480 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6481
6482 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6483 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6484 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
6485 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6486
6487 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6488 }
6489 }
6490
6491 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
6492 {
6493 /* Put in sub */
6494 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
6495 {
6496 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
6497 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
6498 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
6499 if(NULL == pAsm->subs)
6500 {
6501 return GL_FALSE;
6502 }
6503 pAsm->unSubArraySize += 10;
6504 }
6505
6506 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
6507 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
6508 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
6509 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
6510
6511 pAsm->CALLSP++;
6512 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
6513 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
6514 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
6515 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
6516 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
6517 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
6518 SetActiveCFlist(pAsm->pR700Shader,
6519 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6520
6521 pAsm->unSubArrayPointer++;
6522
6523 /* start sub */
6524 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6525
6526 pAsm->FCSP++;
6527 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
6528
6529 checkStackDepth(pAsm, FC_REP, GL_FALSE);
6530
6531 return GL_TRUE;
6532 }
6533
6534 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
6535 {
6536 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
6537 {
6538 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
6539 return GL_FALSE;
6540 }
6541
6542 /* copy max to sub structure */
6543 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
6544 = pAsm->CALLSTACK[pAsm->CALLSP].max;
6545
6546 decreaseCurrent(pAsm, FC_REP);
6547
6548 pAsm->CALLSP--;
6549 SetActiveCFlist(pAsm->pR700Shader,
6550 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
6551
6552 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6553
6554 pAsm->FCSP--;
6555
6556 return GL_TRUE;
6557 }
6558
6559 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
6560 {
6561 GLuint unIF = 0;
6562
6563 if(pAsm->CALLSP > 0)
6564 { /* in sub */
6565 GLuint unFCSP;
6566 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
6567 {
6568 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
6569 {
6570 setRetInLoopFlag(pAsm, SQ_SEL_1);
6571 breakLoopOnFlag(pAsm, unFCSP);
6572 pAsm->unCFflags |= LOOPRET_FLAGS;
6573
6574 return GL_TRUE;
6575 }
6576 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
6577 {
6578 unIF++;
6579 }
6580 }
6581 }
6582
6583 #ifdef USE_CF_FOR_POP_AFTER
6584 if(unIF > 0)
6585 {
6586 pops(pAsm, unIF);
6587 }
6588 #endif /* USE_CF_FOR_POP_AFTER */
6589
6590 add_return_inst(pAsm);
6591
6592 return GL_TRUE;
6593 }
6594
6595 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
6596 GLint nILindex,
6597 GLuint uiIL_Shift,
6598 GLuint uiNumberInsts,
6599 struct prog_instruction *pILInst,
6600 PRESUB_DESC * pPresubDesc)
6601 {
6602 GLint uiIL_Offset;
6603
6604 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6605
6606 if(GL_FALSE == add_cf_instruction(pAsm) )
6607 {
6608 return GL_FALSE;
6609 }
6610
6611 if(8 == pAsm->unAsic)
6612 {
6613 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6614 0,
6615 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6616 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6617 EG_CF_INST_CALL,
6618 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6619 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6620 0,
6621 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6622 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6623 SQ_CF_COND_ACTIVE,
6624 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6625 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6626 0,
6627 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6628 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6629 0,
6630 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6631 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6632 0,
6633 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6634 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6635 1,
6636 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6637 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6638 1,
6639 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6640 }
6641 else
6642 {
6643 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
6644 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
6645 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6646 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6647
6648 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6649 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6650 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
6651 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6652
6653 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6654 }
6655
6656 /* Put in caller */
6657 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
6658 {
6659 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
6660 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
6661 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
6662 if(NULL == pAsm->callers)
6663 {
6664 return GL_FALSE;
6665 }
6666 pAsm->unCallerArraySize += 10;
6667 }
6668
6669 uiIL_Offset = nILindex + uiIL_Shift;
6670 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
6671 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
6672
6673 pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
6674 pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
6675
6676 pAsm->unCallerArrayPointer++;
6677
6678 int j;
6679 GLuint max;
6680 GLuint unSubID;
6681 GLboolean bRet;
6682 for(j=0; j<pAsm->unSubArrayPointer; j++)
6683 {
6684 if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
6685 { /* compiled before */
6686
6687 max = pAsm->subs[j].unStackDepthMax
6688 + pAsm->CALLSTACK[pAsm->CALLSP].current;
6689 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6690 {
6691 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6692 }
6693
6694 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
6695 return GL_TRUE;
6696 }
6697 }
6698
6699 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
6700 unSubID = pAsm->unSubArrayPointer;
6701
6702 bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
6703
6704 if(GL_TRUE == bRet)
6705 {
6706 max = pAsm->subs[unSubID].unStackDepthMax
6707 + pAsm->CALLSTACK[pAsm->CALLSP].current;
6708 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
6709 {
6710 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
6711 }
6712
6713 pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
6714 }
6715
6716 return bRet;
6717 }
6718
6719 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
6720 {
6721 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6722
6723 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6724 pAsm->D.dst.op3 = 0;
6725 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6726 pAsm->D.dst.reg = pAsm->flag_reg_index;
6727 pAsm->D.dst.writex = 1;
6728 pAsm->D.dst.writey = 0;
6729 pAsm->D.dst.writez = 0;
6730 pAsm->D.dst.writew = 0;
6731 pAsm->D2.dst2.literal_slots = 1;
6732 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6733 pAsm->D.dst.predicated = 0;
6734 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
6735 pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
6736 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6737 #if 0
6738 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
6739 //pAsm->S[0].src.reg = 0;
6740 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6741 noneg_PVSSRC(&(pAsm->S[0].src));
6742 pAsm->S[0].src.swizzlex = SQ_SEL_X;
6743 pAsm->S[0].src.swizzley = SQ_SEL_Y;
6744 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6745 pAsm->S[0].src.swizzlew = SQ_SEL_W;
6746
6747 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6748 {
6749 return GL_FALSE;
6750 }
6751 #else
6752 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6753 pAsm->S[0].src.reg = 0;
6754 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6755 noneg_PVSSRC(&(pAsm->S[0].src));
6756 pAsm->S[0].src.swizzlex = flagValue;
6757 pAsm->S[0].src.swizzley = flagValue;
6758 pAsm->S[0].src.swizzlez = flagValue;
6759 pAsm->S[0].src.swizzlew = flagValue;
6760
6761 if( GL_FALSE == next_ins(pAsm) )
6762 {
6763 return GL_FALSE;
6764 }
6765 #endif
6766
6767 return GL_TRUE;
6768 }
6769
6770 GLboolean testFlag(r700_AssemblerBase *pAsm)
6771 {
6772 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
6773
6774 //Test flag
6775 GLuint tmp = gethelpr(pAsm);
6776 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6777
6778 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
6779 pAsm->D.dst.math = 1;
6780 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6781 pAsm->D.dst.reg = tmp;
6782 pAsm->D.dst.writex = 1;
6783 pAsm->D.dst.writey = 0;
6784 pAsm->D.dst.writez = 0;
6785 pAsm->D.dst.writew = 0;
6786 pAsm->D2.dst2.literal_slots = 1;
6787 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
6788 pAsm->D.dst.predicated = 1;
6789 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
6790
6791 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6792 pAsm->S[0].src.reg = pAsm->flag_reg_index;
6793 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6794 noneg_PVSSRC(&(pAsm->S[0].src));
6795 pAsm->S[0].src.swizzlex = SQ_SEL_X;
6796 pAsm->S[0].src.swizzley = SQ_SEL_Y;
6797 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
6798 pAsm->S[0].src.swizzlew = SQ_SEL_W;
6799 #if 0
6800 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
6801 //pAsm->S[1].src.reg = 0;
6802 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6803 noneg_PVSSRC(&(pAsm->S[1].src));
6804 pAsm->S[1].src.swizzlex = SQ_SEL_X;
6805 pAsm->S[1].src.swizzley = SQ_SEL_Y;
6806 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
6807 pAsm->S[1].src.swizzlew = SQ_SEL_W;
6808
6809 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
6810 {
6811 return GL_FALSE;
6812 }
6813 #else
6814 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
6815 pAsm->S[1].src.reg = 0;
6816 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
6817 noneg_PVSSRC(&(pAsm->S[1].src));
6818 pAsm->S[1].src.swizzlex = SQ_SEL_1;
6819 pAsm->S[1].src.swizzley = SQ_SEL_1;
6820 pAsm->S[1].src.swizzlez = SQ_SEL_1;
6821 pAsm->S[1].src.swizzlew = SQ_SEL_1;
6822
6823 if( GL_FALSE == next_ins(pAsm) )
6824 {
6825 return GL_FALSE;
6826 }
6827 #endif
6828
6829 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
6830
6831 return GL_TRUE;
6832 }
6833
6834 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
6835 {
6836 testFlag(pAsm);
6837 jumpToOffest(pAsm, 1, 4);
6838 setRetInLoopFlag(pAsm, SQ_SEL_0);
6839 pops(pAsm, unIF + 1);
6840 add_return_inst(pAsm);
6841
6842 return GL_TRUE;
6843 }
6844
6845 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
6846 {
6847 testFlag(pAsm);
6848
6849 //break
6850 if(GL_FALSE == add_cf_instruction(pAsm) )
6851 {
6852 return GL_FALSE;
6853 }
6854
6855 if(8 == pAsm->unAsic)
6856 {
6857 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6858 1,
6859 EG_CF_WORD1__POP_COUNT_shift, EG_CF_WORD1__POP_COUNT_mask);
6860 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6861 EG_CF_INST_LOOP_BREAK,
6862 EG_CF_WORD1__CF_INST_shift, EG_CF_WORD1__CF_INST_mask);
6863 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6864 0,
6865 EG_CF_WORD1__CF_CONST_shift, EG_CF_WORD1__CF_CONST_mask);
6866 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6867 SQ_CF_COND_ACTIVE,
6868 EG_CF_WORD1__COND_shift, EG_CF_WORD1__COND_mask);
6869 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6870 0,
6871 EG_CF_WORD1__EOP_shift, EG_CF_WORD1__EOP_bit);
6872 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6873 0,
6874 EG_CF_WORD1__VPM_shift, EG_CF_WORD1__VPM_bit);
6875 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6876 0,
6877 EG_CF_WORD1__WQM_shift, EG_CF_WORD1__WQM_bit);
6878 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6879 1,
6880 EG_CF_WORD1__BARRIER_shift, EG_CF_WORD1__BARRIER_bit);
6881 SETfield(pAsm->cf_current_cf_clause_ptr->m_Word1.val,
6882 1,
6883 EG_CF_WORD1__COUNT_shift, EG_CF_WORD1__COUNT_mask);
6884 }
6885 else
6886 {
6887 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
6888 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
6889 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
6890
6891 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
6892 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6893 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
6894 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6895
6896 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
6897 }
6898
6899 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
6900 (void *)pAsm->fc_stack[unFCSP].mid,
6901 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
6902 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
6903 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
6904 pAsm->fc_stack[unFCSP].unNumMid++;
6905
6906 pops(pAsm, 1);
6907
6908 return GL_TRUE;
6909 }
6910
6911 GLboolean AssembleInstr(GLuint uiFirstInst,
6912 GLuint uiIL_Shift,
6913 GLuint uiNumberInsts,
6914 struct prog_instruction *pILInst,
6915 r700_AssemblerBase *pR700AsmCode)
6916 {
6917 GLuint i;
6918
6919 pR700AsmCode->pILInst = pILInst;
6920 for(i=uiFirstInst; i<uiNumberInsts; i++)
6921 {
6922 pR700AsmCode->uiCurInst = i;
6923
6924 #ifndef USE_CF_FOR_CONTINUE_BREAK
6925 if(OPCODE_BRK == pILInst[i+1].Opcode)
6926 {
6927 switch(pILInst[i].Opcode)
6928 {
6929 case OPCODE_SLE:
6930 pILInst[i].Opcode = OPCODE_SGT;
6931 break;
6932 case OPCODE_SLT:
6933 pILInst[i].Opcode = OPCODE_SGE;
6934 break;
6935 case OPCODE_SGE:
6936 pILInst[i].Opcode = OPCODE_SLT;
6937 break;
6938 case OPCODE_SGT:
6939 pILInst[i].Opcode = OPCODE_SLE;
6940 break;
6941 case OPCODE_SEQ:
6942 pILInst[i].Opcode = OPCODE_SNE;
6943 break;
6944 case OPCODE_SNE:
6945 pILInst[i].Opcode = OPCODE_SEQ;
6946 break;
6947 default:
6948 break;
6949 }
6950 }
6951 #endif
6952 if(pILInst[i].CondUpdate == 1)
6953 {
6954 /* remember dest register used for cond evaluation */
6955 /* XXX also handle PROGRAM_OUTPUT registers here? */
6956 pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
6957 }
6958
6959 switch (pILInst[i].Opcode)
6960 {
6961 case OPCODE_ABS:
6962 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
6963 return GL_FALSE;
6964 break;
6965 case OPCODE_ADD:
6966 case OPCODE_SUB:
6967 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
6968 return GL_FALSE;
6969 break;
6970
6971 case OPCODE_ARL:
6972 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
6973 return GL_FALSE;
6974 break;
6975 case OPCODE_ARR:
6976 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
6977 //if ( GL_FALSE == assemble_BAD("ARR") )
6978 return GL_FALSE;
6979 break;
6980
6981 case OPCODE_CMP:
6982 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
6983 return GL_FALSE;
6984 break;
6985 case OPCODE_COS:
6986 if(8 == pR700AsmCode->unAsic)
6987 {
6988 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_COS) )
6989 return GL_FALSE;
6990 }
6991 else
6992 {
6993 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
6994 return GL_FALSE;
6995 }
6996 break;
6997
6998 case OPCODE_DP2:
6999 case OPCODE_DP3:
7000 case OPCODE_DP4:
7001 case OPCODE_DPH:
7002 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
7003 return GL_FALSE;
7004 break;
7005
7006 case OPCODE_DST:
7007 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
7008 return GL_FALSE;
7009 break;
7010
7011 case OPCODE_EX2:
7012 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
7013 return GL_FALSE;
7014 break;
7015 case OPCODE_EXP:
7016 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
7017 return GL_FALSE;
7018 break;
7019
7020 case OPCODE_FLR:
7021 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
7022 return GL_FALSE;
7023 break;
7024 //case OP_FLR_INT: ;
7025
7026 // if ( GL_FALSE == assemble_FLR_INT() )
7027 // return GL_FALSE;
7028 // break;
7029
7030 case OPCODE_FRC:
7031 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
7032 return GL_FALSE;
7033 break;
7034
7035 case OPCODE_KIL:
7036 case OPCODE_KIL_NV:
7037 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
7038 return GL_FALSE;
7039 break;
7040 case OPCODE_LG2:
7041 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
7042 return GL_FALSE;
7043 break;
7044 case OPCODE_LIT:
7045 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
7046 return GL_FALSE;
7047 break;
7048 case OPCODE_LRP:
7049 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
7050 return GL_FALSE;
7051 break;
7052 case OPCODE_LOG:
7053 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
7054 return GL_FALSE;
7055 break;
7056
7057 case OPCODE_MAD:
7058 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
7059 return GL_FALSE;
7060 break;
7061 case OPCODE_MAX:
7062 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
7063 return GL_FALSE;
7064 break;
7065 case OPCODE_MIN:
7066 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
7067 return GL_FALSE;
7068 break;
7069
7070 case OPCODE_MOV:
7071 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
7072 return GL_FALSE;
7073 break;
7074 case OPCODE_MUL:
7075 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
7076 return GL_FALSE;
7077 break;
7078
7079 case OPCODE_NOISE1:
7080 {
7081 callPreSub(pR700AsmCode,
7082 GLSL_NOISE1,
7083 &noise1_presub,
7084 pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
7085 1);
7086 radeon_error("noise1: not yet supported shader instruction\n");
7087 };
7088 break;
7089 case OPCODE_NOISE2:
7090 radeon_error("noise2: not yet supported shader instruction\n");
7091 break;
7092 case OPCODE_NOISE3:
7093 radeon_error("noise3: not yet supported shader instruction\n");
7094 break;
7095 case OPCODE_NOISE4:
7096 radeon_error("noise4: not yet supported shader instruction\n");
7097 break;
7098
7099 case OPCODE_POW:
7100 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
7101 return GL_FALSE;
7102 break;
7103 case OPCODE_RCP:
7104 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
7105 return GL_FALSE;
7106 break;
7107 case OPCODE_RSQ:
7108 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
7109 return GL_FALSE;
7110 break;
7111 case OPCODE_SIN:
7112 if(8 == pR700AsmCode->unAsic)
7113 {
7114 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, EG_OP2_INST_SIN) )
7115 return GL_FALSE;
7116 }
7117 else
7118 {
7119 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
7120 return GL_FALSE;
7121 }
7122 break;
7123 case OPCODE_SCS:
7124 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
7125 return GL_FALSE;
7126 break;
7127
7128 case OPCODE_SEQ:
7129 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
7130 {
7131 return GL_FALSE;
7132 }
7133 break;
7134
7135 case OPCODE_SGT:
7136 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
7137 {
7138 return GL_FALSE;
7139 }
7140 break;
7141
7142 case OPCODE_SGE:
7143 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
7144 {
7145 return GL_FALSE;
7146 }
7147 break;
7148
7149 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
7150 case OPCODE_SLT:
7151 {
7152 struct prog_src_register SrcRegSave[2];
7153 SrcRegSave[0] = pILInst[i].SrcReg[0];
7154 SrcRegSave[1] = pILInst[i].SrcReg[1];
7155 pILInst[i].SrcReg[0] = SrcRegSave[1];
7156 pILInst[i].SrcReg[1] = SrcRegSave[0];
7157 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
7158 {
7159 pILInst[i].SrcReg[0] = SrcRegSave[0];
7160 pILInst[i].SrcReg[1] = SrcRegSave[1];
7161 return GL_FALSE;
7162 }
7163 pILInst[i].SrcReg[0] = SrcRegSave[0];
7164 pILInst[i].SrcReg[1] = SrcRegSave[1];
7165 }
7166 break;
7167
7168 case OPCODE_SLE:
7169 {
7170 struct prog_src_register SrcRegSave[2];
7171 SrcRegSave[0] = pILInst[i].SrcReg[0];
7172 SrcRegSave[1] = pILInst[i].SrcReg[1];
7173 pILInst[i].SrcReg[0] = SrcRegSave[1];
7174 pILInst[i].SrcReg[1] = SrcRegSave[0];
7175 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
7176 {
7177 pILInst[i].SrcReg[0] = SrcRegSave[0];
7178 pILInst[i].SrcReg[1] = SrcRegSave[1];
7179 return GL_FALSE;
7180 }
7181 pILInst[i].SrcReg[0] = SrcRegSave[0];
7182 pILInst[i].SrcReg[1] = SrcRegSave[1];
7183 }
7184 break;
7185
7186 case OPCODE_SNE:
7187 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
7188 {
7189 return GL_FALSE;
7190 }
7191 break;
7192
7193 //case OP_STP:
7194 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
7195 // return GL_FALSE;
7196 // break;
7197
7198 case OPCODE_SSG:
7199 if ( GL_FALSE == assemble_SSG(pR700AsmCode) )
7200 {
7201 return GL_FALSE;
7202 }
7203 break;
7204
7205 case OPCODE_SWZ:
7206 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
7207 {
7208 return GL_FALSE;
7209 }
7210 else
7211 {
7212 if( (i+1)<uiNumberInsts )
7213 {
7214 if(OPCODE_END != pILInst[i+1].Opcode)
7215 {
7216 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
7217 {
7218 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
7219 }
7220 }
7221 }
7222 }
7223 break;
7224 case OPCODE_DDX:
7225 case OPCODE_DDY:
7226 case OPCODE_TEX:
7227 case OPCODE_TXB:
7228 case OPCODE_TXL:
7229 case OPCODE_TXP:
7230 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
7231 return GL_FALSE;
7232 break;
7233
7234 case OPCODE_TRUNC:
7235 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
7236 return GL_FALSE;
7237 break;
7238
7239 case OPCODE_XPD:
7240 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
7241 return GL_FALSE;
7242 break;
7243
7244 case OPCODE_IF:
7245 {
7246 GLboolean bHasElse = GL_FALSE;
7247
7248 if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
7249 {
7250 bHasElse = GL_TRUE;
7251 }
7252
7253 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
7254 {
7255 return GL_FALSE;
7256 }
7257 }
7258 break;
7259
7260 case OPCODE_ELSE :
7261 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
7262 return GL_FALSE;
7263 break;
7264
7265 case OPCODE_ENDIF:
7266 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
7267 return GL_FALSE;
7268 break;
7269
7270 case OPCODE_BGNLOOP:
7271 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
7272 {
7273 return GL_FALSE;
7274 }
7275 break;
7276
7277 case OPCODE_BRK:
7278 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
7279 {
7280 return GL_FALSE;
7281 }
7282 break;
7283
7284 case OPCODE_CONT:
7285 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
7286 {
7287 return GL_FALSE;
7288 }
7289 break;
7290
7291 case OPCODE_ENDLOOP:
7292 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
7293 {
7294 return GL_FALSE;
7295 }
7296 break;
7297
7298 case OPCODE_BGNSUB:
7299 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
7300 {
7301 return GL_FALSE;
7302 }
7303 break;
7304
7305 case OPCODE_RET:
7306 if( GL_FALSE == assemble_RET(pR700AsmCode) )
7307 {
7308 return GL_FALSE;
7309 }
7310 break;
7311
7312 case OPCODE_CAL:
7313 if( GL_FALSE == assemble_CAL(pR700AsmCode,
7314 pILInst[i].BranchTarget,
7315 uiIL_Shift,
7316 uiNumberInsts,
7317 pILInst,
7318 NULL) )
7319 {
7320 return GL_FALSE;
7321 }
7322 break;
7323
7324 //case OPCODE_EXPORT:
7325 // if ( GL_FALSE == assemble_EXPORT() )
7326 // return GL_FALSE;
7327 // break;
7328
7329 case OPCODE_ENDSUB:
7330 return assemble_ENDSUB(pR700AsmCode);
7331
7332 case OPCODE_END:
7333 //pR700AsmCode->uiCurInst = i;
7334 //This is to remaind that if in later exoort there is depth/stencil
7335 //export, we need a mov to re-arrange DST channel, where using a
7336 //psuedo inst, we will use this end inst to do it.
7337 return GL_TRUE;
7338
7339 default:
7340 radeon_error("r600: unknown instruction %d\n", pILInst[i].Opcode);
7341 return GL_FALSE;
7342 }
7343 }
7344
7345 return GL_TRUE;
7346 }
7347
7348 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
7349 {
7350 #ifndef GENERATE_SHADER_FOR_2D
7351 setRetInLoopFlag(pAsm, SQ_SEL_0);
7352 #endif
7353
7354 if((SPT_FP == pAsm->currentShaderType) && (8 == pAsm->unAsic))
7355 {
7356 EG_add_ps_interp(pAsm);
7357 }
7358
7359 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7360 return GL_TRUE;
7361 }
7362
7363 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
7364 {
7365 GLuint i;
7366 GLuint unCFoffset;
7367 TypedShaderList * plstCFmain;
7368 TypedShaderList * plstCFsub;
7369
7370 R700ShaderInstruction * pInst;
7371 R700ControlFlowGenericClause * pCFInst;
7372
7373 R700ControlFlowALUClause * pCF_ALU;
7374 R700ALUInstruction * pALU;
7375 GLuint unConstOffset = 0;
7376 GLuint unRegOffset;
7377 GLuint unMinRegIndex;
7378
7379 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
7380
7381 #ifndef GENERATE_SHADER_FOR_2D
7382 /* remove flags init if they are not used */
7383 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
7384 {
7385 R700ControlFlowALUClause * pCF_ALU;
7386 pInst = plstCFmain->pHead;
7387 while(pInst)
7388 {
7389 if(SIT_CF_ALU == pInst->m_ShaderInstType)
7390 {
7391 pCF_ALU = (R700ControlFlowALUClause *)pInst;
7392 if(0 == pCF_ALU->m_Word1.f.count)
7393 {
7394 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
7395 }
7396 else
7397 {
7398 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
7399
7400 pALU->m_pLinkedALUClause = NULL;
7401 pALU = (R700ALUInstruction *)(pALU->pNextInst);
7402 pALU->m_pLinkedALUClause = pCF_ALU;
7403 pCF_ALU->m_pLinkedALUInstruction = pALU;
7404
7405 pCF_ALU->m_Word1.f.count--;
7406 }
7407 break;
7408 }
7409 pInst = pInst->pNextInst;
7410 };
7411 }
7412 #endif /* GENERATE_SHADER_FOR_2D */
7413
7414 if(pAsm->CALLSTACK[0].max > 0)
7415 {
7416 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
7417 }
7418
7419 if(0 == pAsm->unSubArrayPointer)
7420 {
7421 return GL_TRUE;
7422 }
7423
7424 unCFoffset = plstCFmain->uNumOfNode;
7425
7426 if(NULL != pILProg->Parameters)
7427 {
7428 unConstOffset = pILProg->Parameters->NumParameters;
7429 }
7430
7431 /* Reloc subs */
7432 for(i=0; i<pAsm->unSubArrayPointer; i++)
7433 {
7434 pAsm->subs[i].unCFoffset = unCFoffset;
7435 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
7436
7437 pInst = plstCFsub->pHead;
7438
7439 /* reloc instructions */
7440 while(pInst)
7441 {
7442 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
7443 {
7444 pCFInst = (R700ControlFlowGenericClause *)pInst;
7445
7446 switch (pCFInst->m_Word1.f.cf_inst)
7447 {
7448 case SQ_CF_INST_POP:
7449 case SQ_CF_INST_JUMP:
7450 case SQ_CF_INST_ELSE:
7451 case SQ_CF_INST_LOOP_END:
7452 case SQ_CF_INST_LOOP_START:
7453 case SQ_CF_INST_LOOP_START_NO_AL:
7454 case SQ_CF_INST_LOOP_CONTINUE:
7455 case SQ_CF_INST_LOOP_BREAK:
7456 pCFInst->m_Word0.f.addr += unCFoffset;
7457 break;
7458 default:
7459 break;
7460 }
7461 }
7462
7463 pInst->m_uIndex += unCFoffset;
7464
7465 pInst = pInst->pNextInst;
7466 };
7467
7468 if(NULL != pAsm->subs[i].pPresubDesc)
7469 {
7470 GLuint uNumSrc;
7471
7472 unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
7473 unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
7474 unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
7475
7476 pInst = plstCFsub->pHead;
7477 while(pInst)
7478 {
7479 if(SIT_CF_ALU == pInst->m_ShaderInstType)
7480 {
7481 pCF_ALU = (R700ControlFlowALUClause *)pInst;
7482
7483 pALU = pCF_ALU->m_pLinkedALUInstruction;
7484 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7485 {
7486 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7487
7488 if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
7489 {
7490 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7491 }
7492 else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
7493 {
7494 pALU->m_Word0.f.src0_sel += unConstOffset;
7495 }
7496
7497 if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
7498 >= SQ_OP3_INST_MUL_LIT )
7499 { /* op3 : 3 srcs */
7500 if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
7501 {
7502 pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
7503 }
7504 else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
7505 {
7506 pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
7507 }
7508 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7509 {
7510 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7511 }
7512 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7513 {
7514 pALU->m_Word0.f.src1_sel += unConstOffset;
7515 }
7516 }
7517 else
7518 {
7519 if(8 == pAsm->unAsic)
7520 {
7521 uNumSrc = EG_GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7522 }
7523 else
7524 {
7525 if(pAsm->bR6xx)
7526 {
7527 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
7528 }
7529 else
7530 {
7531 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
7532 }
7533 }
7534 if(2 == uNumSrc)
7535 { /* 2 srcs */
7536 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
7537 {
7538 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
7539 }
7540 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
7541 {
7542 pALU->m_Word0.f.src1_sel += unConstOffset;
7543 }
7544 }
7545 }
7546 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7547 }
7548 }
7549 pInst = pInst->pNextInst;
7550 };
7551 }
7552
7553 /* Put sub into main */
7554 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
7555 plstCFmain->pTail = plstCFsub->pTail;
7556 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
7557
7558 unCFoffset += plstCFsub->uNumOfNode;
7559 }
7560
7561 /* reloc callers */
7562 for(i=0; i<pAsm->unCallerArrayPointer; i++)
7563 {
7564 pAsm->callers[i].cf_ptr->m_Word0.f.addr
7565 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
7566
7567 if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
7568 {
7569 unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
7570 unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
7571
7572 if(NULL != pAsm->callers[i].prelude_cf_ptr)
7573 {
7574 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
7575 pALU = pCF_ALU->m_pLinkedALUInstruction;
7576 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7577 {
7578 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
7579 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7580 }
7581 }
7582 if(NULL != pAsm->callers[i].finale_cf_ptr)
7583 {
7584 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
7585 pALU = pCF_ALU->m_pLinkedALUInstruction;
7586 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
7587 {
7588 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
7589 pALU = (R700ALUInstruction*)(pALU->pNextInst);
7590 }
7591 }
7592 }
7593 }
7594
7595 return GL_TRUE;
7596 }
7597
7598 GLboolean callPreSub(r700_AssemblerBase* pAsm,
7599 LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
7600 COMPILED_SUB * pCompiledSub,
7601 GLshort uOutReg,
7602 GLshort uNumValidSrc)
7603 {
7604 /* save assemble context */
7605 GLuint starting_temp_register_number_save;
7606 GLuint number_used_registers_save;
7607 GLuint uFirstHelpReg_save;
7608 GLuint uHelpReg_save;
7609 GLuint uiCurInst_save;
7610 struct prog_instruction *pILInst_save;
7611 PRESUB_DESC * pPresubDesc;
7612 GLboolean bRet;
7613 int i;
7614
7615 R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
7616
7617 /* copy srcs to presub inputs */
7618 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7619 for(i=0; i<uNumValidSrc; i++)
7620 {
7621 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7622 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
7623 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
7624 pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
7625 pAsm->D.dst.writex = 1;
7626 pAsm->D.dst.writey = 1;
7627 pAsm->D.dst.writez = 1;
7628 pAsm->D.dst.writew = 1;
7629
7630 if( GL_FALSE == assemble_src(pAsm, i, 0) )
7631 {
7632 return GL_FALSE;
7633 }
7634
7635 next_ins(pAsm);
7636 }
7637 if(uNumValidSrc > 0)
7638 {
7639 prelude_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7640 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7641 }
7642
7643 /* browse thro existing presubs. */
7644 for(i=0; i<pAsm->unNumPresub; i++)
7645 {
7646 if(pAsm->presubs[i].sptSigniture == scriptSigniture)
7647 {
7648 break;
7649 }
7650 }
7651
7652 if(i == pAsm->unNumPresub)
7653 { /* not loaded yet */
7654 /* save assemble context */
7655 number_used_registers_save = pAsm->number_used_registers;
7656 uFirstHelpReg_save = pAsm->uFirstHelpReg;
7657 uHelpReg_save = pAsm->uHelpReg;
7658 starting_temp_register_number_save = pAsm->starting_temp_register_number;
7659 pILInst_save = pAsm->pILInst;
7660 uiCurInst_save = pAsm->uiCurInst;
7661
7662 /* alloc in presub */
7663 if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
7664 {
7665 pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
7666 sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
7667 sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
7668 if(NULL == pAsm->presubs)
7669 {
7670 radeon_error("No memeory to allocate built in shader function description structures. \n");
7671 return GL_FALSE;
7672 }
7673 pAsm->unPresubArraySize += 4;
7674 }
7675
7676 pPresubDesc = &(pAsm->presubs[i]);
7677 pPresubDesc->sptSigniture = scriptSigniture;
7678
7679 /* constants offsets need to be final resolved at reloc. */
7680 if(0 == pAsm->unNumPresub)
7681 {
7682 pPresubDesc->unConstantsStart = 0;
7683 }
7684 else
7685 {
7686 pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
7687 + pAsm->presubs[i-1].pCompiledSub->NumParameters;
7688 }
7689
7690 pPresubDesc->pCompiledSub = pCompiledSub;
7691
7692 pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
7693 pPresubDesc->maxStartReg = uFirstHelpReg_save;
7694 pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
7695
7696 pAsm->unNumPresub++;
7697
7698 /* setup new assemble context */
7699 pAsm->starting_temp_register_number = 0;
7700 pAsm->number_used_registers = pCompiledSub->NumTemporaries;
7701 pAsm->uFirstHelpReg = pAsm->number_used_registers;
7702 pAsm->uHelpReg = pAsm->uFirstHelpReg;
7703
7704 bRet = assemble_CAL(pAsm,
7705 0,
7706 pPresubDesc->subIL_Shift,
7707 pCompiledSub->NumInstructions,
7708 pCompiledSub->Instructions,
7709 pPresubDesc);
7710
7711
7712 pPresubDesc->number_used_registers = pAsm->number_used_registers;
7713
7714 /* restore assemble context */
7715 pAsm->number_used_registers = number_used_registers_save;
7716 pAsm->uFirstHelpReg = uFirstHelpReg_save;
7717 pAsm->uHelpReg = uHelpReg_save;
7718 pAsm->starting_temp_register_number = starting_temp_register_number_save;
7719 pAsm->pILInst = pILInst_save;
7720 pAsm->uiCurInst = uiCurInst_save;
7721 }
7722 else
7723 { /* was loaded */
7724 pPresubDesc = &(pAsm->presubs[i]);
7725
7726 bRet = assemble_CAL(pAsm,
7727 0,
7728 pPresubDesc->subIL_Shift,
7729 pCompiledSub->NumInstructions,
7730 pCompiledSub->Instructions,
7731 pPresubDesc);
7732 }
7733
7734 if(GL_FALSE == bRet)
7735 {
7736 radeon_error("Shader presub assemble failed. \n");
7737 }
7738 else
7739 {
7740 /* copy presub output to real dst */
7741 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7742 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
7743
7744 if( GL_FALSE == assemble_dst(pAsm) )
7745 {
7746 return GL_FALSE;
7747 }
7748
7749 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
7750 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
7751 pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
7752 pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
7753 pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
7754 pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
7755 pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
7756
7757 next_ins(pAsm);
7758
7759 pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = (R700ControlFlowGenericClause*) pAsm->cf_current_alu_clause_ptr;
7760 pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
7761 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
7762 }
7763
7764 if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
7765 {
7766 pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
7767 }
7768 if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
7769 {
7770 pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
7771 }
7772
7773 return bRet;
7774 }
7775
7776 GLboolean Process_Export(r700_AssemblerBase* pAsm,
7777 GLuint type,
7778 GLuint export_starting_index,
7779 GLuint export_count,
7780 GLuint starting_register_number,
7781 GLboolean is_depth_export)
7782 {
7783 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
7784 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
7785
7786 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
7787
7788 switch (type)
7789 {
7790 case SQ_EXPORT_PIXEL:
7791 if(GL_TRUE == is_depth_export)
7792 {
7793 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
7794 }
7795 else
7796 {
7797 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
7798 }
7799 break;
7800
7801 case SQ_EXPORT_POS:
7802 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
7803 break;
7804
7805 case SQ_EXPORT_PARAM:
7806 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
7807 break;
7808
7809 default:
7810 radeon_error("Unknown export type: %d\n", type);
7811 return GL_FALSE;
7812 break;
7813 }
7814
7815 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
7816
7817 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
7818 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
7819 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
7820
7821 if(8 == pAsm->unAsic)
7822 {
7823 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7824 (export_count - 1),
7825 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_shift,
7826 EG_CF_ALLOC_EXPORT_WORD1__BURST_COUNT_mask);
7827 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7828 0,
7829 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7830 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
7831 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7832 0,
7833 EG_CF_ALLOC_EXPORT_WORD1__VPM_shift,
7834 EG_CF_ALLOC_EXPORT_WORD1__VPM_bit);
7835 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7836 EG_CF_INST_EXPORT,
7837 EG_CF_WORD1__CF_INST_shift,
7838 EG_CF_WORD1__CF_INST_mask);
7839 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7840 0,
7841 EG_CF_ALLOC_EXPORT_WORD1__MARK_shift,
7842 EG_CF_ALLOC_EXPORT_WORD1__MARK_bit);
7843 SETfield(pAsm->cf_current_export_clause_ptr->m_Word1.val,
7844 1,
7845 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_shift,
7846 EG_CF_ALLOC_EXPORT_WORD1__BARRIER_bit);
7847 }
7848 else
7849 {
7850 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
7851 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
7852 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
7853 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
7854 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
7855 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
7856 }
7857
7858 if (export_count == 1)
7859 {
7860 assert(starting_register_number >= pAsm->starting_export_register_number);
7861
7862 /* exports Z as a float into Red channel */
7863 if (GL_TRUE == is_depth_export)
7864 {
7865 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_Z;
7866 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
7867 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
7868 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
7869 }
7870 else
7871 {
7872 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7873 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7874 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7875 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7876 }
7877 }
7878 else
7879 {
7880 // This should only be used if all components for all registers have been written
7881 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
7882 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
7883 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
7884 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
7885 }
7886
7887 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
7888
7889 return GL_TRUE;
7890 }
7891
7892 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
7893 GLbitfield OutputsWritten)
7894 {
7895 unsigned int unBit;
7896 GLuint export_count = 0;
7897 unsigned int i;
7898
7899 for (i = 0; i < FRAG_RESULT_MAX; ++i)
7900 {
7901 unBit = 1 << i;
7902
7903 if (OutputsWritten & unBit)
7904 {
7905 GLboolean is_depth = i == FRAG_RESULT_DEPTH ? GL_TRUE : GL_FALSE;
7906 if (!Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->uiFP_OutputMap[i], is_depth))
7907 return GL_FALSE;
7908 ++export_count;
7909 }
7910 }
7911
7912 /* Need to export something, otherwise we'll hang
7913 * results are undefined anyway */
7914 if(export_count == 0)
7915 {
7916 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, pR700AsmCode->starting_export_register_number, GL_FALSE);
7917 }
7918
7919 if(pR700AsmCode->cf_last_export_ptr != NULL)
7920 {
7921 if(8 == pR700AsmCode->unAsic)
7922 {
7923 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7924 1,
7925 EG_CF_ALLOC_EXPORT_WORD1__EOP_shift,
7926 EG_CF_ALLOC_EXPORT_WORD1__EOP_bit);
7927 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7928 EG_CF_INST_EXPORT_DONE,
7929 EG_CF_WORD1__CF_INST_shift,
7930 EG_CF_WORD1__CF_INST_mask);
7931 }
7932 else
7933 {
7934 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7935 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
7936 }
7937 }
7938
7939 return GL_TRUE;
7940 }
7941
7942 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
7943 GLbitfield OutputsWritten)
7944 {
7945 unsigned int unBit;
7946 unsigned int i;
7947
7948 GLuint export_starting_index = 0;
7949 GLuint export_count = pR700AsmCode->number_of_exports;
7950
7951 unBit = 1 << VERT_RESULT_HPOS;
7952 if(OutputsWritten & unBit)
7953 {
7954 if( GL_FALSE == Process_Export(pR700AsmCode,
7955 SQ_EXPORT_POS,
7956 export_starting_index,
7957 1,
7958 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
7959 GL_FALSE) )
7960 {
7961 return GL_FALSE;
7962 }
7963 export_starting_index++;
7964 export_count--;
7965 }
7966
7967 unBit = 1 << VERT_RESULT_PSIZ;
7968 if(OutputsWritten & unBit)
7969 {
7970 if( GL_FALSE == Process_Export(pR700AsmCode,
7971 SQ_EXPORT_POS,
7972 export_starting_index,
7973 1,
7974 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_PSIZ],
7975 GL_FALSE) )
7976 {
7977 return GL_FALSE;
7978 }
7979 export_count--;
7980 }
7981
7982 if(8 == pR700AsmCode->unAsic)
7983 {
7984 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
7985 EG_CF_INST_EXPORT_DONE,
7986 EG_CF_WORD1__CF_INST_shift,
7987 EG_CF_WORD1__CF_INST_mask);
7988 }
7989 else
7990 {
7991 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
7992 }
7993
7994
7995 pR700AsmCode->number_of_exports = export_count;
7996 export_starting_index = 0;
7997
7998 unBit = 1 << VERT_RESULT_COL0;
7999 if(OutputsWritten & unBit)
8000 {
8001 if( GL_FALSE == Process_Export(pR700AsmCode,
8002 SQ_EXPORT_PARAM,
8003 export_starting_index,
8004 1,
8005 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
8006 GL_FALSE) )
8007 {
8008 return GL_FALSE;
8009 }
8010
8011 export_starting_index++;
8012 }
8013
8014 unBit = 1 << VERT_RESULT_COL1;
8015 if(OutputsWritten & unBit)
8016 {
8017 if( GL_FALSE == Process_Export(pR700AsmCode,
8018 SQ_EXPORT_PARAM,
8019 export_starting_index,
8020 1,
8021 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
8022 GL_FALSE) )
8023 {
8024 return GL_FALSE;
8025 }
8026
8027 export_starting_index++;
8028 }
8029
8030 unBit = 1 << VERT_RESULT_FOGC;
8031 if(OutputsWritten & unBit)
8032 {
8033 if( GL_FALSE == Process_Export(pR700AsmCode,
8034 SQ_EXPORT_PARAM,
8035 export_starting_index,
8036 1,
8037 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
8038 GL_FALSE) )
8039 {
8040 return GL_FALSE;
8041 }
8042
8043 export_starting_index++;
8044 }
8045
8046 for(i=0; i<8; i++)
8047 {
8048 unBit = 1 << (VERT_RESULT_TEX0 + i);
8049 if(OutputsWritten & unBit)
8050 {
8051 if( GL_FALSE == Process_Export(pR700AsmCode,
8052 SQ_EXPORT_PARAM,
8053 export_starting_index,
8054 1,
8055 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
8056 GL_FALSE) )
8057 {
8058 return GL_FALSE;
8059 }
8060
8061 export_starting_index++;
8062 }
8063 }
8064
8065 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
8066 {
8067 unBit = 1 << i;
8068 if(OutputsWritten & unBit)
8069 {
8070 if( GL_FALSE == Process_Export(pR700AsmCode,
8071 SQ_EXPORT_PARAM,
8072 export_starting_index,
8073 1,
8074 pR700AsmCode->ucVP_OutputMap[i],
8075 GL_FALSE) )
8076 {
8077 return GL_FALSE;
8078 }
8079
8080 export_starting_index++;
8081 }
8082 }
8083
8084 // At least one param should be exported
8085 if (export_count)
8086 {
8087 if(8 == pR700AsmCode->unAsic)
8088 {
8089 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
8090 EG_CF_INST_EXPORT_DONE,
8091 EG_CF_WORD1__CF_INST_shift,
8092 EG_CF_WORD1__CF_INST_mask);
8093 }
8094 else
8095 {
8096 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
8097 }
8098 }
8099 else
8100 {
8101 if( GL_FALSE == Process_Export(pR700AsmCode,
8102 SQ_EXPORT_PARAM,
8103 0,
8104 1,
8105 pR700AsmCode->starting_export_register_number,
8106 GL_FALSE) )
8107 {
8108 return GL_FALSE;
8109 }
8110
8111 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
8112 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
8113 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
8114 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
8115 if(8 == pR700AsmCode->unAsic)
8116 {
8117 SETfield(pR700AsmCode->cf_last_export_ptr->m_Word1.val,
8118 EG_CF_INST_EXPORT_DONE,
8119 EG_CF_WORD1__CF_INST_shift,
8120 EG_CF_WORD1__CF_INST_mask);
8121 }
8122 else
8123 {
8124 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
8125 }
8126 }
8127
8128 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
8129
8130 return GL_TRUE;
8131 }
8132
8133 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
8134 {
8135 if(NULL != pR700AsmCode->pInstDeps)
8136 {
8137 FREE(pR700AsmCode->pInstDeps);
8138 pR700AsmCode->pInstDeps = NULL;
8139 }
8140
8141 if(NULL != pR700AsmCode->subs)
8142 {
8143 FREE(pR700AsmCode->subs);
8144 pR700AsmCode->subs = NULL;
8145 }
8146 if(NULL != pR700AsmCode->callers)
8147 {
8148 FREE(pR700AsmCode->callers);
8149 pR700AsmCode->callers = NULL;
8150 }
8151
8152 if(NULL != pR700AsmCode->presubs)
8153 {
8154 FREE(pR700AsmCode->presubs);
8155 pR700AsmCode->presubs = NULL;
8156 }
8157
8158 return GL_TRUE;
8159 }
8160