Merge branch 'glsl-pp-rework-2'
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
36
37 #include "radeon_debug.h"
38 #include "r600_context.h"
39
40 #include "r700_assembler.h"
41
42 #define USE_CF_FOR_CONTINUE_BREAK 1
43 #define USE_CF_FOR_POP_AFTER 1
44
45 struct prog_instruction noise1_insts[12] = {
46 {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
47 {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
52 {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
53 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
58 };
59 float noise1_const[2][4] = {
60 {0.300000f, 0.900000f, 0.500000f, 0.300000f}
61 };
62
63 COMPILED_SUB noise1_presub = {
64 &(noise1_insts[0]),
65 12,
66 2,
67 1,
68 0,
69 &(noise1_const[0]),
70 SWIZZLE_X,
71 SWIZZLE_X,
72 SWIZZLE_X,
73 SWIZZLE_X,
74 {0,0,0},
75 0
76 };
77
78 BITS addrmode_PVSDST(PVSDST * pPVSDST)
79 {
80 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
81 }
82
83 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
84 {
85 pPVSDST->addrmode0 = addrmode & 1;
86 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
87 }
88
89 void nomask_PVSDST(PVSDST * pPVSDST)
90 {
91 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
92 }
93
94 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
95 {
96 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
97 }
98
99 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
100 {
101 pPVSSRC->addrmode0 = addrmode & 1;
102 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
103 }
104
105
106 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
107 {
108 pPVSSRC->swizzlex =
109 pPVSSRC->swizzley =
110 pPVSSRC->swizzlez =
111 pPVSSRC->swizzlew = swz;
112 }
113
114 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
115 {
116 pPVSSRC->swizzlex = SQ_SEL_X;
117 pPVSSRC->swizzley = SQ_SEL_Y;
118 pPVSSRC->swizzlez = SQ_SEL_Z;
119 pPVSSRC->swizzlew = SQ_SEL_W;
120 }
121
122 void
123 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
124 {
125 switch (x)
126 {
127 case SQ_SEL_X: x = pPVSSRC->swizzlex;
128 break;
129 case SQ_SEL_Y: x = pPVSSRC->swizzley;
130 break;
131 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
132 break;
133 case SQ_SEL_W: x = pPVSSRC->swizzlew;
134 break;
135 default:;
136 }
137
138 switch (y)
139 {
140 case SQ_SEL_X: y = pPVSSRC->swizzlex;
141 break;
142 case SQ_SEL_Y: y = pPVSSRC->swizzley;
143 break;
144 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
145 break;
146 case SQ_SEL_W: y = pPVSSRC->swizzlew;
147 break;
148 default:;
149 }
150
151 switch (z)
152 {
153 case SQ_SEL_X: z = pPVSSRC->swizzlex;
154 break;
155 case SQ_SEL_Y: z = pPVSSRC->swizzley;
156 break;
157 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
158 break;
159 case SQ_SEL_W: z = pPVSSRC->swizzlew;
160 break;
161 default:;
162 }
163
164 switch (w)
165 {
166 case SQ_SEL_X: w = pPVSSRC->swizzlex;
167 break;
168 case SQ_SEL_Y: w = pPVSSRC->swizzley;
169 break;
170 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
171 break;
172 case SQ_SEL_W: w = pPVSSRC->swizzlew;
173 break;
174 default:;
175 }
176
177 pPVSSRC->swizzlex = x;
178 pPVSSRC->swizzley = y;
179 pPVSSRC->swizzlez = z;
180 pPVSSRC->swizzlew = w;
181 }
182
183 void neg_PVSSRC(PVSSRC* pPVSSRC)
184 {
185 pPVSSRC->negx = 1;
186 pPVSSRC->negy = 1;
187 pPVSSRC->negz = 1;
188 pPVSSRC->negw = 1;
189 }
190
191 void noneg_PVSSRC(PVSSRC* pPVSSRC)
192 {
193 pPVSSRC->negx = 0;
194 pPVSSRC->negy = 0;
195 pPVSSRC->negz = 0;
196 pPVSSRC->negw = 0;
197 }
198
199 // negate argument (for SUB instead of ADD and alike)
200 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
201 {
202 pPVSSRC->negx = !pPVSSRC->negx;
203 pPVSSRC->negy = !pPVSSRC->negy;
204 pPVSSRC->negz = !pPVSSRC->negz;
205 pPVSSRC->negw = !pPVSSRC->negw;
206 }
207
208 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
209 {
210 switch (c)
211 {
212 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
213 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
214 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
215 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
216 default:;
217 }
218 }
219
220 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
221 {
222 switch (c)
223 {
224 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
225 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
226 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
227 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
228 default:;
229 }
230 }
231
232 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
233 {
234 return (pOutVTXFmt0->point_size |
235 pOutVTXFmt0->edge_flag |
236 pOutVTXFmt0->rta_index |
237 pOutVTXFmt0->kill_flag |
238 pOutVTXFmt0->viewport_index);
239 }
240
241 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
242 {
243 return (pFPOutFmt->depth |
244 pFPOutFmt->stencil_ref |
245 pFPOutFmt->mask |
246 pFPOutFmt->coverage_to_mask);
247 }
248
249 GLboolean is_reduction_opcode(PVSDWORD* dest)
250 {
251 if (dest->dst.op3 == 0)
252 {
253 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
254 {
255 return GL_TRUE;
256 }
257 }
258 return GL_FALSE;
259 }
260
261 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
262 {
263 GLuint format = FMT_INVALID;
264 GLuint uiElemSize = 0;
265
266 switch (eType)
267 {
268 case GL_BYTE:
269 case GL_UNSIGNED_BYTE:
270 uiElemSize = 1;
271 switch(nChannels)
272 {
273 case 1:
274 format = FMT_8; break;
275 case 2:
276 format = FMT_8_8; break;
277 case 3:
278 format = FMT_8_8_8; break;
279 case 4:
280 format = FMT_8_8_8_8; break;
281 default:
282 break;
283 }
284 break;
285
286 case GL_UNSIGNED_SHORT:
287 case GL_SHORT:
288 uiElemSize = 2;
289 switch(nChannels)
290 {
291 case 1:
292 format = FMT_16; break;
293 case 2:
294 format = FMT_16_16; break;
295 case 3:
296 format = FMT_16_16_16; break;
297 case 4:
298 format = FMT_16_16_16_16; break;
299 default:
300 break;
301 }
302 break;
303
304 case GL_UNSIGNED_INT:
305 case GL_INT:
306 uiElemSize = 4;
307 switch(nChannels)
308 {
309 case 1:
310 format = FMT_32; break;
311 case 2:
312 format = FMT_32_32; break;
313 case 3:
314 format = FMT_32_32_32; break;
315 case 4:
316 format = FMT_32_32_32_32; break;
317 default:
318 break;
319 }
320 break;
321
322 case GL_FLOAT:
323 uiElemSize = 4;
324 switch(nChannels)
325 {
326 case 1:
327 format = FMT_32_FLOAT; break;
328 case 2:
329 format = FMT_32_32_FLOAT; break;
330 case 3:
331 format = FMT_32_32_32_FLOAT; break;
332 case 4:
333 format = FMT_32_32_32_32_FLOAT; break;
334 default:
335 break;
336 }
337 break;
338 case GL_DOUBLE:
339 uiElemSize = 8;
340 switch(nChannels)
341 {
342 case 1:
343 format = FMT_32_FLOAT; break;
344 case 2:
345 format = FMT_32_32_FLOAT; break;
346 case 3:
347 format = FMT_32_32_32_FLOAT; break;
348 case 4:
349 format = FMT_32_32_32_32_FLOAT; break;
350 default:
351 break;
352 }
353 break;
354 default:
355 ;
356 //GL_ASSERT_NO_CASE();
357 }
358
359 if(NULL != pClient_size)
360 {
361 *pClient_size = uiElemSize * nChannels;
362 }
363
364 return(format);
365 }
366
367 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
368 {
369 if(nIsOp3 > 0)
370 {
371 return 3;
372 }
373
374 switch (opcode)
375 {
376 case SQ_OP2_INST_ADD:
377 case SQ_OP2_INST_KILLE:
378 case SQ_OP2_INST_KILLGT:
379 case SQ_OP2_INST_KILLGE:
380 case SQ_OP2_INST_KILLNE:
381 case SQ_OP2_INST_MUL:
382 case SQ_OP2_INST_MAX:
383 case SQ_OP2_INST_MIN:
384 //case SQ_OP2_INST_MAX_DX10:
385 //case SQ_OP2_INST_MIN_DX10:
386 case SQ_OP2_INST_SETE:
387 case SQ_OP2_INST_SETNE:
388 case SQ_OP2_INST_SETGT:
389 case SQ_OP2_INST_SETGE:
390 case SQ_OP2_INST_PRED_SETE:
391 case SQ_OP2_INST_PRED_SETGT:
392 case SQ_OP2_INST_PRED_SETGE:
393 case SQ_OP2_INST_PRED_SETNE:
394 case SQ_OP2_INST_DOT4:
395 case SQ_OP2_INST_DOT4_IEEE:
396 case SQ_OP2_INST_CUBE:
397 return 2;
398
399 case SQ_OP2_INST_MOV:
400 case SQ_OP2_INST_MOVA_FLOOR:
401 case SQ_OP2_INST_FRACT:
402 case SQ_OP2_INST_FLOOR:
403 case SQ_OP2_INST_TRUNC:
404 case SQ_OP2_INST_EXP_IEEE:
405 case SQ_OP2_INST_LOG_CLAMPED:
406 case SQ_OP2_INST_LOG_IEEE:
407 case SQ_OP2_INST_RECIP_IEEE:
408 case SQ_OP2_INST_RECIPSQRT_IEEE:
409 case SQ_OP2_INST_FLT_TO_INT:
410 case SQ_OP2_INST_SIN:
411 case SQ_OP2_INST_COS:
412 return 1;
413
414 default: radeon_error(
415 "Need instruction operand number for %x.\n", opcode);
416 };
417
418 return 3;
419 }
420
421 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
422 {
423 GLuint i;
424
425 Init_R700_Shader(pShader);
426 pAsm->pR700Shader = pShader;
427 pAsm->currentShaderType = spt;
428
429 pAsm->cf_last_export_ptr = NULL;
430
431 pAsm->cf_current_export_clause_ptr = NULL;
432 pAsm->cf_current_alu_clause_ptr = NULL;
433 pAsm->cf_current_tex_clause_ptr = NULL;
434 pAsm->cf_current_vtx_clause_ptr = NULL;
435 pAsm->cf_current_cf_clause_ptr = NULL;
436
437 // No clause has been created yet
438 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
439
440 pAsm->number_of_colorandz_exports = 0;
441 pAsm->number_of_exports = 0;
442 pAsm->number_of_export_opcodes = 0;
443
444 pAsm->alu_x_opcode = 0;
445
446 pAsm->D2.bits = 0;
447
448 pAsm->D.bits = 0;
449 pAsm->S[0].bits = 0;
450 pAsm->S[1].bits = 0;
451 pAsm->S[2].bits = 0;
452
453 pAsm->uLastPosUpdate = 0;
454
455 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
456
457 pAsm->uIIns = 0;
458 pAsm->uOIns = 0;
459 pAsm->number_used_registers = 0;
460 pAsm->uUsedConsts = 256;
461
462
463 // Fragment programs
464 pAsm->uBoolConsts = 0;
465 pAsm->uIntConsts = 0;
466 pAsm->uInsts = 0;
467 pAsm->uConsts = 0;
468
469 pAsm->FCSP = 0;
470 pAsm->fc_stack[0].type = FC_NONE;
471
472 pAsm->aArgSubst[0] =
473 pAsm->aArgSubst[1] =
474 pAsm->aArgSubst[2] =
475 pAsm->aArgSubst[3] = (-1);
476
477 pAsm->uOutputs = 0;
478
479 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
480 {
481 pAsm->color_export_register_number[i] = (-1);
482 }
483
484
485 pAsm->depth_export_register_number = (-1);
486 pAsm->stencil_export_register_number = (-1);
487 pAsm->coverage_to_mask_export_register_number = (-1);
488 pAsm->mask_export_register_number = (-1);
489
490 pAsm->starting_export_register_number = 0;
491 pAsm->starting_vfetch_register_number = 0;
492 pAsm->starting_temp_register_number = 0;
493 pAsm->uFirstHelpReg = 0;
494
495 pAsm->input_position_is_used = GL_FALSE;
496 pAsm->input_normal_is_used = GL_FALSE;
497
498 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
499 {
500 pAsm->input_color_is_used[ i ] = GL_FALSE;
501 }
502
503 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
504 {
505 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
506 }
507
508 for (i=0; i<VERT_ATTRIB_MAX; i++)
509 {
510 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
511 }
512
513 pAsm->number_of_inputs = 0;
514
515 pAsm->is_tex = GL_FALSE;
516 pAsm->need_tex_barrier = GL_FALSE;
517
518 pAsm->subs = NULL;
519 pAsm->unSubArraySize = 0;
520 pAsm->unSubArrayPointer = 0;
521 pAsm->callers = NULL;
522 pAsm->unCallerArraySize = 0;
523 pAsm->unCallerArrayPointer = 0;
524
525 pAsm->CALLSP = 0;
526 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
527 pAsm->CALLSTACK[0].plstCFInstructions_local
528 = &(pAsm->pR700Shader->lstCFInstructions);
529
530 pAsm->CALLSTACK[0].max = 0;
531 pAsm->CALLSTACK[0].current = 0;
532
533 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
534
535 pAsm->unCFflags = 0;
536
537 pAsm->presubs = NULL;
538 pAsm->unPresubArraySize = 0;
539 pAsm->unNumPresub = 0;
540 pAsm->unCurNumILInsts = 0;
541
542 pAsm->unVetTexBits = 0;
543
544 return 0;
545 }
546
547 GLboolean IsTex(gl_inst_opcode Opcode)
548 {
549 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
550 (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
551 {
552 return GL_TRUE;
553 }
554 return GL_FALSE;
555 }
556
557 GLboolean IsAlu(gl_inst_opcode Opcode)
558 {
559 //TODO : more for fc and ex for higher spec.
560 if( IsTex(Opcode) )
561 {
562 return GL_FALSE;
563 }
564 return GL_TRUE;
565 }
566
567 int check_current_clause(r700_AssemblerBase* pAsm,
568 CF_CLAUSE_TYPE new_clause_type)
569 {
570 if (pAsm->cf_current_clause_type != new_clause_type)
571 { //Close last open clause
572 switch (pAsm->cf_current_clause_type)
573 {
574 case CF_ALU_CLAUSE:
575 if ( pAsm->cf_current_alu_clause_ptr != NULL)
576 {
577 pAsm->cf_current_alu_clause_ptr = NULL;
578 }
579 break;
580 case CF_VTX_CLAUSE:
581 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
582 {
583 pAsm->cf_current_vtx_clause_ptr = NULL;
584 }
585 break;
586 case CF_TEX_CLAUSE:
587 if ( pAsm->cf_current_tex_clause_ptr != NULL)
588 {
589 pAsm->cf_current_tex_clause_ptr = NULL;
590 }
591 break;
592 case CF_EXPORT_CLAUSE:
593 if ( pAsm->cf_current_export_clause_ptr != NULL)
594 {
595 pAsm->cf_current_export_clause_ptr = NULL;
596 }
597 break;
598 case CF_OTHER_CLAUSE:
599 if ( pAsm->cf_current_cf_clause_ptr != NULL)
600 {
601 pAsm->cf_current_cf_clause_ptr = NULL;
602 }
603 break;
604 case CF_EMPTY_CLAUSE:
605 break;
606 default:
607 radeon_error(
608 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
609 return GL_FALSE;
610 }
611
612 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
613
614 // Create new clause
615 switch (new_clause_type)
616 {
617 case CF_ALU_CLAUSE:
618 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
619 break;
620 case CF_VTX_CLAUSE:
621 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
622 break;
623 case CF_TEX_CLAUSE:
624 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
625 break;
626 case CF_EXPORT_CLAUSE:
627 {
628 R700ControlFlowSXClause* pR700ControlFlowSXClause
629 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
630
631 // Add new export instruction to control flow program
632 if (pR700ControlFlowSXClause != 0)
633 {
634 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
635 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
636 AddCFInstruction( pAsm->pR700Shader,
637 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
638 }
639 else
640 {
641 radeon_error(
642 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
643 return GL_FALSE;
644 }
645 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
646 }
647 break;
648 case CF_EMPTY_CLAUSE:
649 break;
650 case CF_OTHER_CLAUSE:
651 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
652 break;
653 default:
654 radeon_error(
655 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
656 return GL_FALSE;
657 }
658 }
659
660 return GL_TRUE;
661 }
662
663 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
664 {
665 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
666 {
667 return GL_FALSE;
668 }
669
670 pAsm->cf_current_cf_clause_ptr =
671 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
672
673 if (pAsm->cf_current_cf_clause_ptr != NULL)
674 {
675 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
676 AddCFInstruction( pAsm->pR700Shader,
677 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
678 }
679 else
680 {
681 radeon_error("Could not allocate a new VFetch CF instruction.\n");
682 return GL_FALSE;
683 }
684
685 return GL_TRUE;
686 }
687
688 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
689 R700VertexInstruction* vertex_instruction_ptr)
690 {
691 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
692 {
693 return GL_FALSE;
694 }
695
696 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
697 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
698 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
699 ) )
700 {
701 // Create new Vfetch control flow instruction for this new clause
702 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
703
704 if (pAsm->cf_current_vtx_clause_ptr != NULL)
705 {
706 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
707 AddCFInstruction( pAsm->pR700Shader,
708 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
709 }
710 else
711 {
712 radeon_error("Could not allocate a new VFetch CF instruction.\n");
713 return GL_FALSE;
714 }
715
716 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
717 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
718 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
719 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
720 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
721 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
722 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
723 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
724 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
725
726 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
727 }
728 else
729 {
730 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
731 }
732
733 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
734
735 return GL_TRUE;
736 }
737
738 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
739 R700TextureInstruction* tex_instruction_ptr)
740 {
741 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
742 {
743 return GL_FALSE;
744 }
745
746 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
747 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
748 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
749 ) )
750 {
751 // new tex cf instruction for this new clause
752 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
753
754 if (pAsm->cf_current_tex_clause_ptr != NULL)
755 {
756 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
757 AddCFInstruction( pAsm->pR700Shader,
758 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
759 }
760 else
761 {
762 radeon_error("Could not allocate a new TEX CF instruction.\n");
763 return GL_FALSE;
764 }
765
766 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
767 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
768 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
769
770 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
771 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
772 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
773 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
774 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
775 }
776 else
777 {
778 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
779 }
780
781 // If this clause constains any TEX instruction that is dependent on a previous instruction,
782 // set the barrier bit
783 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
784 {
785 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
786 }
787
788 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
789 {
790 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
791 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
792 }
793
794 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
795
796 return GL_TRUE;
797 }
798
799 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
800 GLuint gl_client_id,
801 GLuint destination_register,
802 GLuint number_of_elements,
803 GLenum dataElementType,
804 VTX_FETCH_METHOD* pFetchMethod)
805 {
806 GLuint client_size_inbyte;
807 GLuint data_format;
808 GLuint mega_fetch_count;
809 GLuint is_mega_fetch_flag;
810
811 R700VertexGenericFetch* vfetch_instruction_ptr;
812 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
813
814 if (assembled_vfetch_instruction_ptr == NULL)
815 {
816 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
817 if (vfetch_instruction_ptr == NULL)
818 {
819 return GL_FALSE;
820 }
821 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
822 }
823 else
824 {
825 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
826 }
827
828 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
829
830 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
831 {
832 //TODO : mini fetch
833 }
834 else
835 {
836 mega_fetch_count = MEGA_FETCH_BYTES - 1;
837 is_mega_fetch_flag = 0x1;
838 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
839 }
840
841 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
842 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
843 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
844
845 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
846 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
847 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
848 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
849 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
850
851 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
852 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
853 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
854 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
855
856 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
857
858 // Destination register
859 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
860 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
861
862 vfetch_instruction_ptr->m_Word2.f.offset = 0;
863 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
864
865 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
866
867 if (assembled_vfetch_instruction_ptr == NULL)
868 {
869 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
870 {
871 return GL_FALSE;
872 }
873
874 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
875 {
876 return GL_FALSE;
877 }
878 else
879 {
880 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
881 }
882 }
883
884 return GL_TRUE;
885 }
886
887 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
888 GLuint destination_register,
889 GLenum type,
890 GLint size,
891 GLubyte element,
892 GLuint _signed,
893 GLboolean normalize,
894 VTX_FETCH_METHOD * pFetchMethod)
895 {
896 GLuint client_size_inbyte;
897 GLuint data_format;
898 GLuint mega_fetch_count;
899 GLuint is_mega_fetch_flag;
900
901 R700VertexGenericFetch* vfetch_instruction_ptr;
902 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
903 = pAsm->vfetch_instruction_ptr_array[element];
904
905 if (assembled_vfetch_instruction_ptr == NULL)
906 {
907 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
908 if (vfetch_instruction_ptr == NULL)
909 {
910 return GL_FALSE;
911 }
912 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
913 }
914 else
915 {
916 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
917 }
918
919 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
920
921 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
922 {
923 //TODO : mini fetch
924 }
925 else
926 {
927 mega_fetch_count = MEGA_FETCH_BYTES - 1;
928 is_mega_fetch_flag = 0x1;
929 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
930 }
931
932 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
933 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
934 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
935
936 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
937 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
938 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
939 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
940 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
941
942 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
943 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
944 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
945 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
946
947 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
948 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
949 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
950
951 if(1 == _signed)
952 {
953 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
954 }
955 else
956 {
957 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
958 }
959
960 if(GL_TRUE == normalize)
961 {
962 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
963 }
964 else
965 {
966 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
967 }
968
969 // Destination register
970 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
971 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
972
973 vfetch_instruction_ptr->m_Word2.f.offset = 0;
974 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
975
976 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
977
978 if (assembled_vfetch_instruction_ptr == NULL)
979 {
980 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
981 {
982 return GL_FALSE;
983 }
984
985 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
986 {
987 return GL_FALSE;
988 }
989 else
990 {
991 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
992 }
993 }
994
995 return GL_TRUE;
996 }
997
998 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
999 {
1000 GLint i;
1001 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
1002 pAsm->cf_current_vtx_clause_ptr = NULL;
1003
1004 for (i=0; i<VERT_ATTRIB_MAX; i++)
1005 {
1006 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
1007 }
1008
1009 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
1010
1011 return GL_TRUE;
1012 }
1013
1014 GLuint gethelpr(r700_AssemblerBase* pAsm)
1015 {
1016 GLuint r = pAsm->uHelpReg;
1017 pAsm->uHelpReg++;
1018 if (pAsm->uHelpReg > pAsm->number_used_registers)
1019 {
1020 pAsm->number_used_registers = pAsm->uHelpReg;
1021 }
1022 return r;
1023 }
1024 void resethelpr(r700_AssemblerBase* pAsm)
1025 {
1026 pAsm->uHelpReg = pAsm->uFirstHelpReg;
1027 }
1028
1029 void checkop_init(r700_AssemblerBase* pAsm)
1030 {
1031 resethelpr(pAsm);
1032 pAsm->aArgSubst[0] =
1033 pAsm->aArgSubst[1] =
1034 pAsm->aArgSubst[2] =
1035 pAsm->aArgSubst[3] = -1;
1036 }
1037
1038 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
1039 {
1040 GLuint tmp = gethelpr(pAsm);
1041
1042 //mov src to temp helper gpr.
1043 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1044
1045 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1046
1047 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1048 pAsm->D.dst.reg = tmp;
1049
1050 nomask_PVSDST(&(pAsm->D.dst));
1051
1052 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1053 {
1054 return GL_FALSE;
1055 }
1056
1057 noswizzle_PVSSRC(&(pAsm->S[0].src));
1058 noneg_PVSSRC(&(pAsm->S[0].src));
1059
1060 if( GL_FALSE == next_ins(pAsm) )
1061 {
1062 return GL_FALSE;
1063 }
1064
1065 pAsm->aArgSubst[1 + src] = tmp;
1066
1067 return GL_TRUE;
1068 }
1069
1070 GLboolean checkop1(r700_AssemblerBase* pAsm)
1071 {
1072 checkop_init(pAsm);
1073 return GL_TRUE;
1074 }
1075
1076 GLboolean checkop2(r700_AssemblerBase* pAsm)
1077 {
1078 GLboolean bSrcConst[2];
1079 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1080
1081 checkop_init(pAsm);
1082
1083 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1084 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1085 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1086 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1087 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1088 {
1089 bSrcConst[0] = GL_TRUE;
1090 }
1091 else
1092 {
1093 bSrcConst[0] = GL_FALSE;
1094 }
1095 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1096 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1097 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1098 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1099 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1100 {
1101 bSrcConst[1] = GL_TRUE;
1102 }
1103 else
1104 {
1105 bSrcConst[1] = GL_FALSE;
1106 }
1107
1108 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1109 {
1110 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1111 {
1112 if( GL_FALSE == mov_temp(pAsm, 1) )
1113 {
1114 return GL_FALSE;
1115 }
1116 }
1117 }
1118
1119 return GL_TRUE;
1120 }
1121
1122 GLboolean checkop3(r700_AssemblerBase* pAsm)
1123 {
1124 GLboolean bSrcConst[3];
1125 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1126
1127 checkop_init(pAsm);
1128
1129 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1130 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1131 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1132 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1133 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1134 {
1135 bSrcConst[0] = GL_TRUE;
1136 }
1137 else
1138 {
1139 bSrcConst[0] = GL_FALSE;
1140 }
1141 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1142 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1143 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1144 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1145 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1146 {
1147 bSrcConst[1] = GL_TRUE;
1148 }
1149 else
1150 {
1151 bSrcConst[1] = GL_FALSE;
1152 }
1153 if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
1154 (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1155 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1156 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1157 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1158 {
1159 bSrcConst[2] = GL_TRUE;
1160 }
1161 else
1162 {
1163 bSrcConst[2] = GL_FALSE;
1164 }
1165
1166 if( (GL_TRUE == bSrcConst[0]) &&
1167 (GL_TRUE == bSrcConst[1]) &&
1168 (GL_TRUE == bSrcConst[2]) )
1169 {
1170 if( GL_FALSE == mov_temp(pAsm, 1) )
1171 {
1172 return GL_FALSE;
1173 }
1174 if( GL_FALSE == mov_temp(pAsm, 2) )
1175 {
1176 return GL_FALSE;
1177 }
1178
1179 return GL_TRUE;
1180 }
1181 else if( (GL_TRUE == bSrcConst[0]) &&
1182 (GL_TRUE == bSrcConst[1]) )
1183 {
1184 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1185 {
1186 if( GL_FALSE == mov_temp(pAsm, 1) )
1187 {
1188 return 1;
1189 }
1190 }
1191
1192 return GL_TRUE;
1193 }
1194 else if ( (GL_TRUE == bSrcConst[0]) &&
1195 (GL_TRUE == bSrcConst[2]) )
1196 {
1197 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1198 {
1199 if( GL_FALSE == mov_temp(pAsm, 2) )
1200 {
1201 return GL_FALSE;
1202 }
1203 }
1204
1205 return GL_TRUE;
1206 }
1207 else if( (GL_TRUE == bSrcConst[1]) &&
1208 (GL_TRUE == bSrcConst[2]) )
1209 {
1210 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1211 {
1212 if( GL_FALSE == mov_temp(pAsm, 2) )
1213 {
1214 return GL_FALSE;
1215 }
1216 }
1217
1218 return GL_TRUE;
1219 }
1220
1221 return GL_TRUE;
1222 }
1223
1224 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1225 int src,
1226 int fld)
1227 {
1228 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1229
1230 if (fld == -1)
1231 {
1232 fld = src;
1233 }
1234
1235 if(pAsm->aArgSubst[1+src] >= 0)
1236 {
1237 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1238 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1239 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1240 }
1241 else
1242 {
1243 switch (pILInst->SrcReg[src].File)
1244 {
1245 case PROGRAM_TEMPORARY:
1246 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1247 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1248 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1249 break;
1250 case PROGRAM_CONSTANT:
1251 case PROGRAM_LOCAL_PARAM:
1252 case PROGRAM_ENV_PARAM:
1253 case PROGRAM_STATE_VAR:
1254 case PROGRAM_UNIFORM:
1255 if (1 == pILInst->SrcReg[src].RelAddr)
1256 {
1257 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1258 }
1259 else
1260 {
1261 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1262 }
1263
1264 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1265 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1266 break;
1267 case PROGRAM_INPUT:
1268 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1269 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1270 switch (pAsm->currentShaderType)
1271 {
1272 case SPT_FP:
1273 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1274 break;
1275 case SPT_VP:
1276 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1277 break;
1278 }
1279 break;
1280 default:
1281 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1282 return GL_FALSE;
1283 }
1284 }
1285
1286 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1287 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1288 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1289 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1290
1291 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1292 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1293 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1294 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1295
1296 return GL_TRUE;
1297 }
1298
1299 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1300 {
1301 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1302 switch (pILInst->DstReg.File)
1303 {
1304 case PROGRAM_TEMPORARY:
1305 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1306 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1307 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1308 break;
1309 case PROGRAM_ADDRESS:
1310 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1311 pAsm->D.dst.rtype = DST_REG_A0;
1312 pAsm->D.dst.reg = 0;
1313 break;
1314 case PROGRAM_OUTPUT:
1315 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1316 pAsm->D.dst.rtype = DST_REG_OUT;
1317 switch (pAsm->currentShaderType)
1318 {
1319 case SPT_FP:
1320 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1321 break;
1322 case SPT_VP:
1323 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1324 break;
1325 }
1326 break;
1327 default:
1328 radeon_error("Invalid destination output argument type\n");
1329 return GL_FALSE;
1330 }
1331
1332 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1333 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1334 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1335 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1336
1337 if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1338 {
1339 pAsm->D2.dst2.SaturateMode = 1;
1340 }
1341 else
1342 {
1343 pAsm->D2.dst2.SaturateMode = 0;
1344 }
1345
1346 return GL_TRUE;
1347 }
1348
1349 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1350 {
1351 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1352
1353 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1354 {
1355 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1356 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1357
1358 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1359 }
1360 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1361 {
1362 pAsm->D.dst.rtype = DST_REG_OUT;
1363 switch (pAsm->currentShaderType)
1364 {
1365 case SPT_FP:
1366 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1367 break;
1368 case SPT_VP:
1369 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1370 break;
1371 }
1372
1373 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1374 }
1375 else
1376 {
1377 radeon_error("Invalid destination output argument type\n");
1378 return GL_FALSE;
1379 }
1380
1381 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1382 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1383 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1384 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1385
1386 return GL_TRUE;
1387 }
1388
1389 GLboolean tex_src(r700_AssemblerBase *pAsm)
1390 {
1391 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1392
1393 GLboolean bValidTexCoord = GL_FALSE;
1394
1395 if(pAsm->aArgSubst[1] >= 0)
1396 {
1397 bValidTexCoord = GL_TRUE;
1398 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1399 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1400 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1401 }
1402 else
1403 {
1404 switch (pILInst->SrcReg[0].File) {
1405 case PROGRAM_UNIFORM:
1406 case PROGRAM_CONSTANT:
1407 case PROGRAM_LOCAL_PARAM:
1408 case PROGRAM_ENV_PARAM:
1409 case PROGRAM_STATE_VAR:
1410 break;
1411 case PROGRAM_TEMPORARY:
1412 bValidTexCoord = GL_TRUE;
1413 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1414 pAsm->starting_temp_register_number;
1415 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1416 break;
1417 case PROGRAM_INPUT:
1418 if(SPT_VP == pAsm->currentShaderType)
1419 {
1420 switch (pILInst->SrcReg[0].Index)
1421 {
1422 case VERT_ATTRIB_TEX0:
1423 case VERT_ATTRIB_TEX1:
1424 case VERT_ATTRIB_TEX2:
1425 case VERT_ATTRIB_TEX3:
1426 case VERT_ATTRIB_TEX4:
1427 case VERT_ATTRIB_TEX5:
1428 case VERT_ATTRIB_TEX6:
1429 case VERT_ATTRIB_TEX7:
1430 bValidTexCoord = GL_TRUE;
1431 pAsm->S[0].src.reg =
1432 pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
1433 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1434 break;
1435 }
1436 }
1437 else
1438 {
1439 switch (pILInst->SrcReg[0].Index)
1440 {
1441 case FRAG_ATTRIB_WPOS:
1442 case FRAG_ATTRIB_COL0:
1443 case FRAG_ATTRIB_COL1:
1444 case FRAG_ATTRIB_FOGC:
1445 case FRAG_ATTRIB_TEX0:
1446 case FRAG_ATTRIB_TEX1:
1447 case FRAG_ATTRIB_TEX2:
1448 case FRAG_ATTRIB_TEX3:
1449 case FRAG_ATTRIB_TEX4:
1450 case FRAG_ATTRIB_TEX5:
1451 case FRAG_ATTRIB_TEX6:
1452 case FRAG_ATTRIB_TEX7:
1453 bValidTexCoord = GL_TRUE;
1454 pAsm->S[0].src.reg =
1455 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1456 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1457 break;
1458 case FRAG_ATTRIB_FACE:
1459 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1460 break;
1461 case FRAG_ATTRIB_PNTC:
1462 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1463 break;
1464 }
1465
1466 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1467 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1468 {
1469 bValidTexCoord = GL_TRUE;
1470 pAsm->S[0].src.reg =
1471 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1472 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1473 }
1474 }
1475
1476 break;
1477 }
1478 }
1479
1480 if(GL_TRUE == bValidTexCoord)
1481 {
1482 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1483 }
1484 else
1485 {
1486 radeon_error("Invalid source texcoord for TEX instruction\n");
1487 return GL_FALSE;
1488 }
1489
1490 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1491 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1492 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1493 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1494
1495 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1496 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1497 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1498 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1499
1500 return GL_TRUE;
1501 }
1502
1503 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1504 {
1505 PVSSRC * texture_coordinate_source;
1506 PVSSRC * texture_unit_source;
1507
1508 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1509 if (tex_instruction_ptr == NULL)
1510 {
1511 return GL_FALSE;
1512 }
1513 Init_R700TextureInstruction(tex_instruction_ptr);
1514
1515 texture_coordinate_source = &(pAsm->S[0].src);
1516 texture_unit_source = &(pAsm->S[1].src);
1517
1518 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1519 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1520 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1521 tex_instruction_ptr->m_Word0.f.alt_const = 0;
1522
1523 if(SPT_VP == pAsm->currentShaderType)
1524 {
1525 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
1526 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
1527 }
1528 else
1529 {
1530 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1531 }
1532
1533 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1534 if (normalized) {
1535 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1536 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1537 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1538 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1539 } else {
1540 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1541 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1542 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1543 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1544 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1545 }
1546
1547 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1548 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1549 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1550 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1551
1552 // dst
1553 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1554 (pAsm->D.dst.rtype == DST_REG_OUT) )
1555 {
1556 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1557 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1558
1559 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1560 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1561
1562 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1563 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1564 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1565 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1566
1567
1568 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1569 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1570 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1571 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1572 }
1573 else
1574 {
1575 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1576 return GL_FALSE;
1577 }
1578
1579 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1580 {
1581 return GL_FALSE;
1582 }
1583
1584 return GL_TRUE;
1585 }
1586
1587 void initialize(r700_AssemblerBase *pAsm)
1588 {
1589 GLuint cycle, component;
1590
1591 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1592 {
1593 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1594 {
1595 pAsm->hw_gpr[cycle][component] = (-1);
1596 }
1597 }
1598 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1599 {
1600 pAsm->hw_cfile_addr[component] = (-1);
1601 pAsm->hw_cfile_chan[component] = (-1);
1602 }
1603 }
1604
1605 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1606 int source_index,
1607 PVSSRC* pSource,
1608 BITS scalar_channel_index)
1609 {
1610 BITS src_sel;
1611 BITS src_rel;
1612 BITS src_chan;
1613 BITS src_neg;
1614
1615 //--------------------------------------------------------------------------
1616 // Source for operands src0, src1.
1617 // Values [0,127] correspond to GPR[0..127].
1618 // Values [256,511] correspond to cfile constants c[0..255].
1619
1620 //--------------------------------------------------------------------------
1621 // Other special values are shown in the list below.
1622
1623 // 248 SQ_ALU_SRC_0: special constant 0.0.
1624 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1625
1626 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1627 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1628
1629 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1630 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1631
1632 // 254 SQ_ALU_SRC_PV: previous vector result.
1633 // 255 SQ_ALU_SRC_PS: previous scalar result.
1634 //--------------------------------------------------------------------------
1635
1636 BITS channel_swizzle;
1637 switch (scalar_channel_index)
1638 {
1639 case 0: channel_swizzle = pSource->swizzlex; break;
1640 case 1: channel_swizzle = pSource->swizzley; break;
1641 case 2: channel_swizzle = pSource->swizzlez; break;
1642 case 3: channel_swizzle = pSource->swizzlew; break;
1643 default: channel_swizzle = SQ_SEL_MASK; break;
1644 }
1645
1646 if(channel_swizzle == SQ_SEL_0)
1647 {
1648 src_sel = SQ_ALU_SRC_0;
1649 }
1650 else if (channel_swizzle == SQ_SEL_1)
1651 {
1652 src_sel = SQ_ALU_SRC_1;
1653 }
1654 else
1655 {
1656 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1657 (pSource->rtype == SRC_REG_INPUT)
1658 )
1659 {
1660 src_sel = pSource->reg;
1661 }
1662 else if (pSource->rtype == SRC_REG_CONSTANT)
1663 {
1664 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1665 }
1666 else if (pSource->rtype == SRC_REC_LITERAL)
1667 {
1668 src_sel = SQ_ALU_SRC_LITERAL;
1669 }
1670 else
1671 {
1672 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1673 source_index, pSource->rtype);
1674 return GL_FALSE;
1675 }
1676 }
1677
1678 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1679 {
1680 src_rel = SQ_ABSOLUTE;
1681 }
1682 else
1683 {
1684 src_rel = SQ_RELATIVE;
1685 }
1686
1687 switch (channel_swizzle)
1688 {
1689 case SQ_SEL_X:
1690 src_chan = SQ_CHAN_X;
1691 break;
1692 case SQ_SEL_Y:
1693 src_chan = SQ_CHAN_Y;
1694 break;
1695 case SQ_SEL_Z:
1696 src_chan = SQ_CHAN_Z;
1697 break;
1698 case SQ_SEL_W:
1699 src_chan = SQ_CHAN_W;
1700 break;
1701 case SQ_SEL_0:
1702 case SQ_SEL_1:
1703 // Does not matter since src_sel controls
1704 src_chan = SQ_CHAN_X;
1705 break;
1706 default:
1707 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1708 return GL_FALSE;
1709 break;
1710 }
1711
1712 switch (scalar_channel_index)
1713 {
1714 case 0: src_neg = pSource->negx; break;
1715 case 1: src_neg = pSource->negy; break;
1716 case 2: src_neg = pSource->negz; break;
1717 case 3: src_neg = pSource->negw; break;
1718 default: src_neg = 0; break;
1719 }
1720
1721 switch (source_index)
1722 {
1723 case 0:
1724 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1725 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1726 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1727 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1728 break;
1729 case 1:
1730 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1731 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1732 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1733 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1734 break;
1735 case 2:
1736 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1737 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1738 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1739 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1740 break;
1741 default:
1742 radeon_error("Only three sources allowed in ALU opcodes.\n");
1743 return GL_FALSE;
1744 break;
1745 }
1746
1747 return GL_TRUE;
1748 }
1749
1750 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1751 R700ALUInstruction* alu_instruction_ptr,
1752 GLuint contiguous_slots_needed)
1753 {
1754 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1755 {
1756 return GL_FALSE;
1757 }
1758
1759 if ( pAsm->alu_x_opcode != 0 ||
1760 pAsm->cf_current_alu_clause_ptr == NULL ||
1761 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1762 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1763 ) )
1764 {
1765
1766 //new cf inst for this clause
1767 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1768
1769 // link the new cf to cf segment
1770 if(NULL != pAsm->cf_current_alu_clause_ptr)
1771 {
1772 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1773 AddCFInstruction( pAsm->pR700Shader,
1774 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1775 }
1776 else
1777 {
1778 radeon_error("Could not allocate a new ALU CF instruction.\n");
1779 return GL_FALSE;
1780 }
1781
1782 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1783 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1784 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1785
1786 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1787 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1788 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1789
1790 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1791
1792 if(pAsm->alu_x_opcode != 0)
1793 {
1794 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
1795 pAsm->alu_x_opcode = 0;
1796 }
1797 else
1798 {
1799 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1800 }
1801
1802 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1803
1804 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1805 }
1806 else
1807 {
1808 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
1809 }
1810
1811 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1812 // set the whole_quad_mode for this clause
1813 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1814 {
1815 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1816 }
1817
1818 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1819 {
1820 alu_instruction_ptr->m_Word0.f.last = 1;
1821 }
1822
1823 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1824 {
1825 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1826 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1827 }
1828
1829 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1830
1831 return GL_TRUE;
1832 }
1833
1834 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1835 int source_index,
1836 BITS* psrc_sel,
1837 BITS* psrc_rel,
1838 BITS* psrc_chan,
1839 BITS* psrc_neg)
1840 {
1841 switch (source_index)
1842 {
1843 case 0:
1844 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1845 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1846 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1847 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1848 break;
1849
1850 case 1:
1851 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1852 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1853 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1854 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1855 break;
1856
1857 case 2:
1858 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1859 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1860 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1861 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1862 break;
1863 }
1864 }
1865
1866 int is_cfile(BITS sel)
1867 {
1868 if (sel > 255 && sel < 512)
1869 {
1870 return 1;
1871 }
1872 return 0;
1873 }
1874
1875 int is_const(BITS sel)
1876 {
1877 if (is_cfile(sel))
1878 {
1879 return 1;
1880 }
1881 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1882 {
1883 return 1;
1884 }
1885 return 0;
1886 }
1887
1888 int is_gpr(BITS sel)
1889 {
1890 if (sel >= 0 && sel < 128)
1891 {
1892 return 1;
1893 }
1894 return 0;
1895 }
1896
1897 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1898 SQ_ALU_VEC_120, //001
1899 SQ_ALU_VEC_102, //010
1900
1901 SQ_ALU_VEC_201, //011
1902 SQ_ALU_VEC_012, //100
1903 SQ_ALU_VEC_021, //101
1904
1905 SQ_ALU_VEC_012, //110
1906 SQ_ALU_VEC_012}; //111
1907
1908 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1909 SQ_ALU_SCL_122, //001
1910 SQ_ALU_SCL_122, //010
1911
1912 SQ_ALU_SCL_221, //011
1913 SQ_ALU_SCL_212, //100
1914 SQ_ALU_SCL_122, //101
1915
1916 SQ_ALU_SCL_122, //110
1917 SQ_ALU_SCL_122}; //111
1918
1919 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1920 GLuint sel,
1921 GLuint chan)
1922 {
1923 int res_match = (-1);
1924 int res_empty = (-1);
1925
1926 GLint res;
1927
1928 for (res=3; res>=0; res--)
1929 {
1930 if(pAsm->hw_cfile_addr[ res] < 0)
1931 {
1932 res_empty = res;
1933 }
1934 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1935 &&
1936 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1937 {
1938 res_match = res;
1939 }
1940 }
1941
1942 if(res_match >= 0)
1943 {
1944 // Read for this scalar component already reserved, nothing to do here.
1945 ;
1946 }
1947 else if(res_empty >= 0)
1948 {
1949 pAsm->hw_cfile_addr[ res_empty ] = sel;
1950 pAsm->hw_cfile_chan[ res_empty ] = chan;
1951 }
1952 else
1953 {
1954 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1955 return GL_FALSE;
1956 }
1957 return GL_TRUE;
1958 }
1959
1960 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1961 {
1962 if(pAsm->hw_gpr[cycle][chan] < 0)
1963 {
1964 pAsm->hw_gpr[cycle][chan] = sel;
1965 }
1966 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1967 {
1968 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1969 return GL_FALSE;
1970 }
1971
1972 return GL_TRUE;
1973 }
1974
1975 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1976 {
1977 switch (swiz)
1978 {
1979 case SQ_ALU_SCL_210:
1980 {
1981 int table[3] = {2, 1, 0};
1982 *pCycle = table[sel];
1983 return GL_TRUE;
1984 }
1985 break;
1986 case SQ_ALU_SCL_122:
1987 {
1988 int table[3] = {1, 2, 2};
1989 *pCycle = table[sel];
1990 return GL_TRUE;
1991 }
1992 break;
1993 case SQ_ALU_SCL_212:
1994 {
1995 int table[3] = {2, 1, 2};
1996 *pCycle = table[sel];
1997 return GL_TRUE;
1998 }
1999 break;
2000 case SQ_ALU_SCL_221:
2001 {
2002 int table[3] = {2, 2, 1};
2003 *pCycle = table[sel];
2004 return GL_TRUE;
2005 }
2006 break;
2007 default:
2008 radeon_error("Bad Scalar bank swizzle value\n");
2009 break;
2010 }
2011
2012 return GL_FALSE;
2013 }
2014
2015 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2016 {
2017 switch (swiz)
2018 {
2019 case SQ_ALU_VEC_012:
2020 {
2021 int table[3] = {0, 1, 2};
2022 *pCycle = table[sel];
2023 }
2024 break;
2025 case SQ_ALU_VEC_021:
2026 {
2027 int table[3] = {0, 2, 1};
2028 *pCycle = table[sel];
2029 }
2030 break;
2031 case SQ_ALU_VEC_120:
2032 {
2033 int table[3] = {1, 2, 0};
2034 *pCycle = table[sel];
2035 }
2036 break;
2037 case SQ_ALU_VEC_102:
2038 {
2039 int table[3] = {1, 0, 2};
2040 *pCycle = table[sel];
2041 }
2042 break;
2043 case SQ_ALU_VEC_201:
2044 {
2045 int table[3] = {2, 0, 1};
2046 *pCycle = table[sel];
2047 }
2048 break;
2049 case SQ_ALU_VEC_210:
2050 {
2051 int table[3] = {2, 1, 0};
2052 *pCycle = table[sel];
2053 }
2054 break;
2055 default:
2056 radeon_error("Bad Vec bank swizzle value\n");
2057 return GL_FALSE;
2058 break;
2059 }
2060
2061 return GL_TRUE;
2062 }
2063
2064 GLboolean check_scalar(r700_AssemblerBase* pAsm,
2065 R700ALUInstruction* alu_instruction_ptr)
2066 {
2067 GLuint cycle;
2068 GLuint bank_swizzle;
2069 GLuint const_count = 0;
2070
2071 BITS sel;
2072 BITS chan;
2073 BITS rel;
2074 BITS neg;
2075
2076 GLuint src;
2077
2078 BITS src_sel [3] = {0,0,0};
2079 BITS src_chan[3] = {0,0,0};
2080 BITS src_rel [3] = {0,0,0};
2081 BITS src_neg [3] = {0,0,0};
2082
2083 GLuint swizzle_key;
2084
2085 GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2086
2087 for (src=0; src<number_of_operands; src++)
2088 {
2089 get_src_properties(alu_instruction_ptr,
2090 src,
2091 &(src_sel[src]),
2092 &(src_rel[src]),
2093 &(src_chan[src]),
2094 &(src_neg[src]) );
2095 }
2096
2097
2098 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2099 (is_const( src_sel[1] ) ? 2 : 0) +
2100 (is_const( src_sel[2] ) ? 1 : 0) );
2101
2102 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2103
2104 for (src=0; src<number_of_operands; src++)
2105 {
2106 sel = src_sel [src];
2107 chan = src_chan[src];
2108 rel = src_rel [src];
2109 neg = src_neg [src];
2110
2111 if (is_const( sel ))
2112 {
2113 // Any constant, including literal and inline constants
2114 const_count++;
2115
2116 if (is_cfile( sel ))
2117 {
2118 reserve_cfile(pAsm, sel, chan);
2119 }
2120
2121 }
2122 }
2123
2124 for (src=0; src<number_of_operands; src++)
2125 {
2126 sel = src_sel [src];
2127 chan = src_chan[src];
2128 rel = src_rel [src];
2129 neg = src_neg [src];
2130
2131 if( is_gpr(sel) )
2132 {
2133 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2134
2135 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2136 {
2137 return GL_FALSE;
2138 }
2139
2140 if(cycle < const_count)
2141 {
2142 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2143 {
2144 return GL_FALSE;
2145 }
2146 }
2147 }
2148 }
2149
2150 return GL_TRUE;
2151 }
2152
2153 GLboolean check_vector(r700_AssemblerBase* pAsm,
2154 R700ALUInstruction* alu_instruction_ptr)
2155 {
2156 GLuint cycle;
2157 GLuint bank_swizzle;
2158 GLuint const_count = 0;
2159
2160 GLuint src;
2161
2162 BITS sel;
2163 BITS chan;
2164 BITS rel;
2165 BITS neg;
2166
2167 BITS src_sel [3] = {0,0,0};
2168 BITS src_chan[3] = {0,0,0};
2169 BITS src_rel [3] = {0,0,0};
2170 BITS src_neg [3] = {0,0,0};
2171
2172 GLuint swizzle_key;
2173
2174 GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2175
2176 for (src=0; src<number_of_operands; src++)
2177 {
2178 get_src_properties(alu_instruction_ptr,
2179 src,
2180 &(src_sel[src]),
2181 &(src_rel[src]),
2182 &(src_chan[src]),
2183 &(src_neg[src]) );
2184 }
2185
2186
2187 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2188 (is_const( src_sel[1] ) ? 2 : 0) +
2189 (is_const( src_sel[2] ) ? 1 : 0)
2190 );
2191
2192 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2193
2194 for (src=0; src<number_of_operands; src++)
2195 {
2196 sel = src_sel [src];
2197 chan = src_chan[src];
2198 rel = src_rel [src];
2199 neg = src_neg [src];
2200
2201
2202 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2203
2204 if( is_gpr(sel) )
2205 {
2206 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2207 {
2208 return GL_FALSE;
2209 }
2210
2211 if ( (src == 1) &&
2212 (sel == src_sel[0]) &&
2213 (chan == src_chan[0]) )
2214 {
2215 }
2216 else
2217 {
2218 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2219 {
2220 return GL_FALSE;
2221 }
2222 }
2223 }
2224 else if( is_const(sel) )
2225 {
2226 const_count++;
2227
2228 if( is_cfile(sel) )
2229 {
2230 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2231 {
2232 return GL_FALSE;
2233 }
2234 }
2235 }
2236 }
2237
2238 return GL_TRUE;
2239 }
2240
2241 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2242 {
2243 R700ALUInstruction * alu_instruction_ptr;
2244 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2245 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2246
2247 GLuint number_of_scalar_operations;
2248 GLboolean is_single_scalar_operation;
2249 GLuint scalar_channel_index;
2250
2251 PVSSRC * pcurrent_source;
2252 int current_source_index;
2253 GLuint contiguous_slots_needed;
2254
2255 GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2256 //GLuint channel_swizzle, j;
2257 //GLuint chan_counter[4] = {0, 0, 0, 0};
2258 //PVSSRC * pSource[3];
2259 GLboolean bSplitInst = GL_FALSE;
2260
2261 if (1 == pAsm->D.dst.math)
2262 {
2263 is_single_scalar_operation = GL_TRUE;
2264 number_of_scalar_operations = 1;
2265 }
2266 else
2267 {
2268 is_single_scalar_operation = GL_FALSE;
2269 number_of_scalar_operations = 4;
2270
2271 /* current assembler doesn't do more than 1 register per source */
2272 #if 0
2273 /* check read port, only very preliminary algorithm, not count in
2274 src0/1 same comp case and prev slot repeat case; also not count relative
2275 addressing. TODO: improve performance. */
2276 for(j=0; j<uNumSrc; j++)
2277 {
2278 pSource[j] = &(pAsm->S[j].src);
2279 }
2280 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2281 {
2282 for(j=0; j<uNumSrc; j++)
2283 {
2284 switch (scalar_channel_index)
2285 {
2286 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2287 case 1: channel_swizzle = pSource[j]->swizzley; break;
2288 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2289 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2290 default: channel_swizzle = SQ_SEL_MASK; break;
2291 }
2292 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2293 (pSource[j]->rtype == SRC_REG_INPUT))
2294 && (channel_swizzle <= SQ_SEL_W) )
2295 {
2296 chan_counter[channel_swizzle]++;
2297 }
2298 }
2299 }
2300 if( (chan_counter[SQ_SEL_X] > 3)
2301 || (chan_counter[SQ_SEL_Y] > 3)
2302 || (chan_counter[SQ_SEL_Z] > 3)
2303 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2304 {
2305 bSplitInst = GL_TRUE;
2306 }
2307 #endif
2308 }
2309
2310 contiguous_slots_needed = 0;
2311
2312 if(!is_single_scalar_operation)
2313 {
2314 contiguous_slots_needed = 4;
2315 }
2316
2317 contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2318
2319 initialize(pAsm);
2320
2321 for (scalar_channel_index=0;
2322 scalar_channel_index < number_of_scalar_operations;
2323 scalar_channel_index++)
2324 {
2325 if(scalar_channel_index == (number_of_scalar_operations-1))
2326 {
2327 switch(pAsm->D2.dst2.literal_slots)
2328 {
2329 case 0:
2330 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2331 Init_R700ALUInstruction(alu_instruction_ptr);
2332 break;
2333 case 1:
2334 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2335 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2336 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2337 break;
2338 case 2:
2339 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2340 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2341 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2342 break;
2343 };
2344 }
2345 else
2346 {
2347 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2348 Init_R700ALUInstruction(alu_instruction_ptr);
2349 }
2350
2351 //src 0
2352 current_source_index = 0;
2353 pcurrent_source = &(pAsm->S[0].src);
2354
2355 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2356 current_source_index,
2357 pcurrent_source,
2358 scalar_channel_index) )
2359 {
2360 return GL_FALSE;
2361 }
2362
2363 if (uNumSrc > 1)
2364 {
2365 // Process source 1
2366 current_source_index = 1;
2367 pcurrent_source = &(pAsm->S[current_source_index].src);
2368
2369 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2370 current_source_index,
2371 pcurrent_source,
2372 scalar_channel_index) )
2373 {
2374 return GL_FALSE;
2375 }
2376 }
2377
2378 //other bits
2379 alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
2380
2381 if( (is_single_scalar_operation == GL_TRUE)
2382 || (GL_TRUE == bSplitInst) )
2383 {
2384 alu_instruction_ptr->m_Word0.f.last = 1;
2385 }
2386 else
2387 {
2388 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2389 }
2390
2391 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
2392 if(1 == pAsm->D.dst.predicated)
2393 {
2394 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2395 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2396 }
2397 else
2398 {
2399 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2400 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2401 }
2402
2403 // dst
2404 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2405 (pAsm->D.dst.rtype == DST_REG_OUT) )
2406 {
2407 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2408 }
2409 else
2410 {
2411 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2412 return GL_FALSE;
2413 }
2414
2415 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2416
2417 if ( is_single_scalar_operation == GL_TRUE )
2418 {
2419 // Override scalar_channel_index since only one scalar value will be written
2420 if(pAsm->D.dst.writex)
2421 {
2422 scalar_channel_index = 0;
2423 }
2424 else if(pAsm->D.dst.writey)
2425 {
2426 scalar_channel_index = 1;
2427 }
2428 else if(pAsm->D.dst.writez)
2429 {
2430 scalar_channel_index = 2;
2431 }
2432 else if(pAsm->D.dst.writew)
2433 {
2434 scalar_channel_index = 3;
2435 }
2436 }
2437
2438 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2439
2440 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2441
2442 if (pAsm->D.dst.op3)
2443 {
2444 //op3
2445
2446 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2447
2448 //There's 3rd src for op3
2449 current_source_index = 2;
2450 pcurrent_source = &(pAsm->S[current_source_index].src);
2451
2452 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2453 current_source_index,
2454 pcurrent_source,
2455 scalar_channel_index) )
2456 {
2457 return GL_FALSE;
2458 }
2459 }
2460 else
2461 {
2462 //op2
2463 if (pAsm->bR6xx)
2464 {
2465 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2466
2467 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
2468 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
2469
2470 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2471 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2472 switch (scalar_channel_index)
2473 {
2474 case 0:
2475 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2476 break;
2477 case 1:
2478 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2479 break;
2480 case 2:
2481 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2482 break;
2483 case 3:
2484 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2485 break;
2486 default:
2487 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2488 break;
2489 }
2490 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2491 }
2492 else
2493 {
2494 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2495
2496 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
2497 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
2498
2499 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2500 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2501 switch (scalar_channel_index)
2502 {
2503 case 0:
2504 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2505 break;
2506 case 1:
2507 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2508 break;
2509 case 2:
2510 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2511 break;
2512 case 3:
2513 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2514 break;
2515 default:
2516 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2517 break;
2518 }
2519 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2520 }
2521 }
2522
2523 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2524 {
2525 return GL_FALSE;
2526 }
2527
2528 /*
2529 * Judge the type of current instruction, is it vector or scalar
2530 * instruction.
2531 */
2532 if (is_single_scalar_operation)
2533 {
2534 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2535 {
2536 return GL_FALSE;
2537 }
2538 }
2539 else
2540 {
2541 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2542 {
2543 return GL_FALSE;
2544 }
2545 }
2546
2547 contiguous_slots_needed -= 1;
2548 }
2549
2550 return GL_TRUE;
2551 }
2552
2553 GLboolean next_ins(r700_AssemblerBase *pAsm)
2554 {
2555 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2556
2557 if( GL_TRUE == pAsm->is_tex )
2558 {
2559 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2560 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2561 {
2562 radeon_error("Error assembling TEX instruction\n");
2563 return GL_FALSE;
2564 }
2565 } else {
2566 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2567 {
2568 radeon_error("Error assembling TEX instruction\n");
2569 return GL_FALSE;
2570 }
2571 }
2572 }
2573 else
2574 { //ALU
2575 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2576 {
2577 radeon_error("Error assembling ALU instruction\n");
2578 return GL_FALSE;
2579 }
2580 }
2581
2582 if(pAsm->D.dst.rtype == DST_REG_OUT)
2583 {
2584 if(pAsm->D.dst.op3)
2585 {
2586 // There is no mask for OP3 instructions, so all channels are written
2587 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2588 }
2589 else
2590 {
2591 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2592 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2593 }
2594 }
2595
2596 //reset for next inst.
2597 pAsm->D.bits = 0;
2598 pAsm->D2.bits = 0;
2599 pAsm->S[0].bits = 0;
2600 pAsm->S[1].bits = 0;
2601 pAsm->S[2].bits = 0;
2602 pAsm->is_tex = GL_FALSE;
2603 pAsm->need_tex_barrier = GL_FALSE;
2604 pAsm->D2.bits = 0;
2605 pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
2606 return GL_TRUE;
2607 }
2608
2609 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2610 {
2611 BITS tmp;
2612
2613 checkop1(pAsm);
2614
2615 tmp = gethelpr(pAsm);
2616
2617 // opcode tmp.x, a.x
2618 // MOV dst, tmp.x
2619
2620 pAsm->D.dst.opcode = opcode;
2621 pAsm->D.dst.math = 1;
2622
2623 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2624 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2625 pAsm->D.dst.reg = tmp;
2626 pAsm->D.dst.writex = 1;
2627
2628 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2629 {
2630 return GL_FALSE;
2631 }
2632
2633 if ( GL_FALSE == next_ins(pAsm) )
2634 {
2635 return GL_FALSE;
2636 }
2637
2638 // Now replicate result to all necessary channels in destination
2639 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2640
2641 if( GL_FALSE == assemble_dst(pAsm) )
2642 {
2643 return GL_FALSE;
2644 }
2645
2646 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2647 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2648 pAsm->S[0].src.reg = tmp;
2649
2650 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2651 noneg_PVSSRC(&(pAsm->S[0].src));
2652
2653 if( GL_FALSE == next_ins(pAsm) )
2654 {
2655 return GL_FALSE;
2656 }
2657
2658 return GL_TRUE;
2659 }
2660
2661 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2662 {
2663 checkop1(pAsm);
2664
2665 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2666
2667 if( GL_FALSE == assemble_dst(pAsm) )
2668 {
2669 return GL_FALSE;
2670 }
2671 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2672 {
2673 return GL_FALSE;
2674 }
2675
2676 pAsm->S[1].bits = pAsm->S[0].bits;
2677 flipneg_PVSSRC(&(pAsm->S[1].src));
2678
2679 if ( GL_FALSE == next_ins(pAsm) )
2680 {
2681 return GL_FALSE;
2682 }
2683
2684 return GL_TRUE;
2685 }
2686
2687 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2688 {
2689 if( GL_FALSE == checkop2(pAsm) )
2690 {
2691 return GL_FALSE;
2692 }
2693
2694 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2695
2696 if( GL_FALSE == assemble_dst(pAsm) )
2697 {
2698 return GL_FALSE;
2699 }
2700
2701 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2702 {
2703 return GL_FALSE;
2704 }
2705
2706 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2707 {
2708 return GL_FALSE;
2709 }
2710
2711 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2712 {
2713 flipneg_PVSSRC(&(pAsm->S[1].src));
2714 }
2715
2716 if( GL_FALSE == next_ins(pAsm) )
2717 {
2718 return GL_FALSE;
2719 }
2720
2721 return GL_TRUE;
2722 }
2723
2724 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
2725 { /* TODO: ar values dont' persist between clauses */
2726 if( GL_FALSE == checkop1(pAsm) )
2727 {
2728 return GL_FALSE;
2729 }
2730
2731 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
2732 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2733 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2734 pAsm->D.dst.reg = 0;
2735 pAsm->D.dst.writex = 0;
2736 pAsm->D.dst.writey = 0;
2737 pAsm->D.dst.writez = 0;
2738 pAsm->D.dst.writew = 0;
2739
2740 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2741 {
2742 return GL_FALSE;
2743 }
2744
2745 if( GL_FALSE == next_ins(pAsm) )
2746 {
2747 return GL_FALSE;
2748 }
2749
2750 return GL_TRUE;
2751 }
2752
2753 GLboolean assemble_BAD(char *opcode_str)
2754 {
2755 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2756 return GL_FALSE;
2757 }
2758
2759 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2760 {
2761 int tmp;
2762
2763 if( GL_FALSE == checkop3(pAsm) )
2764 {
2765 return GL_FALSE;
2766 }
2767
2768 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2769 pAsm->D.dst.op3 = 1;
2770
2771 tmp = (-1);
2772
2773 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2774 {
2775 //OP3 has no support for write mask
2776 tmp = gethelpr(pAsm);
2777
2778 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2779 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2780 pAsm->D.dst.reg = tmp;
2781
2782 nomask_PVSDST(&(pAsm->D.dst));
2783 }
2784 else
2785 {
2786 if( GL_FALSE == assemble_dst(pAsm) )
2787 {
2788 return GL_FALSE;
2789 }
2790 }
2791
2792 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2793 {
2794 return GL_FALSE;
2795 }
2796
2797 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2798 {
2799 return GL_FALSE;
2800 }
2801
2802 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2803 {
2804 return GL_FALSE;
2805 }
2806
2807 if ( GL_FALSE == next_ins(pAsm) )
2808 {
2809 return GL_FALSE;
2810 }
2811
2812 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2813 {
2814 if( GL_FALSE == assemble_dst(pAsm) )
2815 {
2816 return GL_FALSE;
2817 }
2818
2819 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2820
2821 //tmp for source
2822 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2823 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2824 pAsm->S[0].src.reg = tmp;
2825
2826 noneg_PVSSRC(&(pAsm->S[0].src));
2827 noswizzle_PVSSRC(&(pAsm->S[0].src));
2828
2829 if( GL_FALSE == next_ins(pAsm) )
2830 {
2831 return GL_FALSE;
2832 }
2833 }
2834
2835 return GL_TRUE;
2836 }
2837
2838 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
2839 {
2840 int tmp;
2841 checkop1(pAsm);
2842
2843 tmp = gethelpr(pAsm);
2844
2845 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2846 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2847 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2848 pAsm->D.dst.reg = tmp;
2849 pAsm->D.dst.writex = 1;
2850
2851 assemble_src(pAsm, 0, -1);
2852
2853 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
2854 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
2855 pAsm->D2.dst2.literal_slots = 1;
2856 pAsm->C[0].f = 1/(3.1415926535 * 2);
2857 pAsm->C[1].f = 0.0F;
2858 next_ins(pAsm);
2859
2860 pAsm->D.dst.opcode = opcode;
2861 pAsm->D.dst.math = 1;
2862
2863 assemble_dst(pAsm);
2864
2865 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2866 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2867 pAsm->S[0].src.reg = tmp;
2868 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2869 noneg_PVSSRC(&(pAsm->S[0].src));
2870
2871 next_ins(pAsm);
2872
2873 //TODO - replicate if more channels set in WriteMask
2874 return GL_TRUE;
2875
2876 }
2877
2878 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2879 {
2880 if( GL_FALSE == checkop2(pAsm) )
2881 {
2882 return GL_FALSE;
2883 }
2884
2885 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2886
2887 if( GL_FALSE == assemble_dst(pAsm) )
2888 {
2889 return GL_FALSE;
2890 }
2891
2892 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2893 {
2894 return GL_FALSE;
2895 }
2896
2897 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2898 {
2899 return GL_FALSE;
2900 }
2901
2902 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2903 {
2904 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2905 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2906 }
2907 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2908 {
2909 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2910 }
2911
2912 if ( GL_FALSE == next_ins(pAsm) )
2913 {
2914 return GL_FALSE;
2915 }
2916
2917 return GL_TRUE;
2918 }
2919
2920 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2921 {
2922 if( GL_FALSE == checkop2(pAsm) )
2923 {
2924 return GL_FALSE;
2925 }
2926
2927 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2928
2929 if( GL_FALSE == assemble_dst(pAsm) )
2930 {
2931 return GL_FALSE;
2932 }
2933
2934 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2935 {
2936 return GL_FALSE;
2937 }
2938
2939 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2940 {
2941 return GL_FALSE;
2942 }
2943
2944 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2945 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2946
2947 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2948 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2949
2950 if ( GL_FALSE == next_ins(pAsm) )
2951 {
2952 return GL_FALSE;
2953 }
2954
2955 return GL_TRUE;
2956 }
2957
2958 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2959 {
2960 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2961 }
2962
2963 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
2964 {
2965 BITS tmp;
2966
2967 checkop1(pAsm);
2968
2969 tmp = gethelpr(pAsm);
2970
2971 // FLOOR tmp.x, a.x
2972 // EX2 dst.x tmp.x
2973
2974 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
2975 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2976
2977 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2978 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2979 pAsm->D.dst.reg = tmp;
2980 pAsm->D.dst.writex = 1;
2981
2982 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2983 {
2984 return GL_FALSE;
2985 }
2986
2987 if( GL_FALSE == next_ins(pAsm) )
2988 {
2989 return GL_FALSE;
2990 }
2991
2992 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
2993 pAsm->D.dst.math = 1;
2994
2995 if( GL_FALSE == assemble_dst(pAsm) )
2996 {
2997 return GL_FALSE;
2998 }
2999
3000 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3001
3002 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3003 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3004 pAsm->S[0].src.reg = tmp;
3005
3006 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3007 noneg_PVSSRC(&(pAsm->S[0].src));
3008
3009 if( GL_FALSE == next_ins(pAsm) )
3010 {
3011 return GL_FALSE;
3012 }
3013 }
3014
3015 // FRACT dst.y a.x
3016
3017 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3018 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3019
3020 if( GL_FALSE == assemble_dst(pAsm) )
3021 {
3022 return GL_FALSE;
3023 }
3024
3025 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3026 {
3027 return GL_FALSE;
3028 }
3029
3030 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3031
3032 if( GL_FALSE == next_ins(pAsm) )
3033 {
3034 return GL_FALSE;
3035 }
3036 }
3037
3038 // EX2 dst.z, a.x
3039
3040 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3041 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3042 pAsm->D.dst.math = 1;
3043
3044 if( GL_FALSE == assemble_dst(pAsm) )
3045 {
3046 return GL_FALSE;
3047 }
3048
3049 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3050 {
3051 return GL_FALSE;
3052 }
3053
3054 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3055
3056 if( GL_FALSE == next_ins(pAsm) )
3057 {
3058 return GL_FALSE;
3059 }
3060 }
3061
3062 // MOV dst.w 1.0
3063
3064 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3065 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3066
3067 if( GL_FALSE == assemble_dst(pAsm) )
3068 {
3069 return GL_FALSE;
3070 }
3071
3072 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3073
3074 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3075 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3076 pAsm->S[0].src.reg = tmp;
3077
3078 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3079 noneg_PVSSRC(&(pAsm->S[0].src));
3080
3081 if( GL_FALSE == next_ins(pAsm) )
3082 {
3083 return GL_FALSE;
3084 }
3085 }
3086
3087 return GL_TRUE;
3088 }
3089
3090 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3091 {
3092 checkop1(pAsm);
3093
3094 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3095
3096 if ( GL_FALSE == assemble_dst(pAsm) )
3097 {
3098 return GL_FALSE;
3099 }
3100
3101 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3102 {
3103 return GL_FALSE;
3104 }
3105
3106 if ( GL_FALSE == next_ins(pAsm) )
3107 {
3108 return GL_FALSE;
3109 }
3110
3111 return GL_TRUE;
3112 }
3113
3114 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3115 {
3116 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3117 }
3118
3119 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3120 {
3121 checkop1(pAsm);
3122
3123 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3124
3125 if ( GL_FALSE == assemble_dst(pAsm) )
3126 {
3127 return GL_FALSE;
3128 }
3129
3130 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3131 {
3132 return GL_FALSE;
3133 }
3134
3135 if ( GL_FALSE == next_ins(pAsm) )
3136 {
3137 return GL_FALSE;
3138 }
3139
3140 return GL_TRUE;
3141 }
3142
3143 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3144 {
3145 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3146
3147 if(pILInst->Opcode == OPCODE_KIL)
3148 checkop1(pAsm);
3149
3150 pAsm->D.dst.opcode = opcode;
3151 //pAsm->D.dst.math = 1;
3152
3153 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3154 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3155 pAsm->D.dst.reg = 0;
3156 pAsm->D.dst.writex = 0;
3157 pAsm->D.dst.writey = 0;
3158 pAsm->D.dst.writez = 0;
3159 pAsm->D.dst.writew = 0;
3160
3161 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3162 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3163 pAsm->S[0].src.reg = 0;
3164 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3165 noneg_PVSSRC(&(pAsm->S[0].src));
3166
3167 if(pILInst->Opcode == OPCODE_KIL_NV)
3168 {
3169 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3170 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3171 pAsm->S[1].src.reg = 0;
3172 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3173 neg_PVSSRC(&(pAsm->S[1].src));
3174 }
3175 else
3176 {
3177 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3178 {
3179 return GL_FALSE;
3180 }
3181
3182 }
3183
3184 if ( GL_FALSE == next_ins(pAsm) )
3185 {
3186 return GL_FALSE;
3187 }
3188
3189 /* Doc says KILL has to be last(end) ALU clause */
3190 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3191 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3192
3193 return GL_TRUE;
3194 }
3195
3196 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3197 {
3198 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3199 }
3200
3201 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3202 {
3203 BITS tmp;
3204
3205 if( GL_FALSE == checkop3(pAsm) )
3206 {
3207 return GL_FALSE;
3208 }
3209
3210 tmp = gethelpr(pAsm);
3211
3212 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3213
3214 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3215 pAsm->D.dst.reg = tmp;
3216 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3217 nomask_PVSDST(&(pAsm->D.dst));
3218
3219
3220 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3221 {
3222 return GL_FALSE;
3223 }
3224
3225 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3226 {
3227 return GL_FALSE;
3228 }
3229
3230 neg_PVSSRC(&(pAsm->S[1].src));
3231
3232 if( GL_FALSE == next_ins(pAsm) )
3233 {
3234 return GL_FALSE;
3235 }
3236
3237 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3238 pAsm->D.dst.op3 = 1;
3239
3240 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3241 pAsm->D.dst.reg = tmp;
3242 nomask_PVSDST(&(pAsm->D.dst));
3243 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3244
3245 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3246 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3247 pAsm->S[0].src.reg = tmp;
3248 noswizzle_PVSSRC(&(pAsm->S[0].src));
3249
3250
3251 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3252 {
3253 return GL_FALSE;
3254 }
3255
3256 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3257 {
3258 return GL_FALSE;
3259 }
3260
3261 if( GL_FALSE == next_ins(pAsm) )
3262 {
3263 return GL_FALSE;
3264 }
3265
3266 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3267
3268 if( GL_FALSE == assemble_dst(pAsm) )
3269 {
3270 return GL_FALSE;
3271 }
3272
3273 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3274 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3275 pAsm->S[0].src.reg = tmp;
3276 noswizzle_PVSSRC(&(pAsm->S[0].src));
3277
3278 if( GL_FALSE == next_ins(pAsm) )
3279 {
3280 return GL_FALSE;
3281 }
3282
3283 return GL_TRUE;
3284 }
3285
3286 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3287 {
3288 BITS tmp1, tmp2, tmp3;
3289
3290 checkop1(pAsm);
3291
3292 tmp1 = gethelpr(pAsm);
3293 tmp2 = gethelpr(pAsm);
3294 tmp3 = gethelpr(pAsm);
3295
3296 // FIXME: The hardware can do fabs() directly on input
3297 // elements, but the compiler doesn't have the
3298 // capability to use that.
3299
3300 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3301
3302 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3303
3304 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3305 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3306 pAsm->D.dst.reg = tmp1;
3307 pAsm->D.dst.writex = 1;
3308
3309 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3310 {
3311 return GL_FALSE;
3312 }
3313
3314 pAsm->S[1].bits = pAsm->S[0].bits;
3315 flipneg_PVSSRC(&(pAsm->S[1].src));
3316
3317 if ( GL_FALSE == next_ins(pAsm) )
3318 {
3319 return GL_FALSE;
3320 }
3321
3322 // Entire algo:
3323 //
3324 // LG2 tmp2.x, tmp1.x
3325 // FLOOR tmp3.x, tmp2.x
3326 // MOV dst.x, tmp3.x
3327 // ADD tmp3.x, tmp2.x, -tmp3.x
3328 // EX2 dst.y, tmp3.x
3329 // MOV dst.z, tmp2.x
3330 // MOV dst.w, 1.0
3331
3332 // LG2 tmp2.x, tmp1.x
3333 // FLOOR tmp3.x, tmp2.x
3334
3335 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3336 pAsm->D.dst.math = 1;
3337
3338 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3339 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3340 pAsm->D.dst.reg = tmp2;
3341 pAsm->D.dst.writex = 1;
3342
3343 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3344 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3345 pAsm->S[0].src.reg = tmp1;
3346
3347 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3348 noneg_PVSSRC(&(pAsm->S[0].src));
3349
3350 if( GL_FALSE == next_ins(pAsm) )
3351 {
3352 return GL_FALSE;
3353 }
3354
3355 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3356
3357 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3358 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3359 pAsm->D.dst.reg = tmp3;
3360 pAsm->D.dst.writex = 1;
3361
3362 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3363 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3364 pAsm->S[0].src.reg = tmp2;
3365
3366 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3367 noneg_PVSSRC(&(pAsm->S[0].src));
3368
3369 if( GL_FALSE == next_ins(pAsm) )
3370 {
3371 return GL_FALSE;
3372 }
3373
3374 // MOV dst.x, tmp3.x
3375
3376 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3377
3378 if( GL_FALSE == assemble_dst(pAsm) )
3379 {
3380 return GL_FALSE;
3381 }
3382
3383 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3384
3385 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3386 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3387 pAsm->S[0].src.reg = tmp3;
3388
3389 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3390 noneg_PVSSRC(&(pAsm->S[0].src));
3391
3392 if( GL_FALSE == next_ins(pAsm) )
3393 {
3394 return GL_FALSE;
3395 }
3396
3397 // ADD tmp3.x, tmp2.x, -tmp3.x
3398 // EX2 dst.y, tmp3.x
3399
3400 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3401
3402 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3403 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3404 pAsm->D.dst.reg = tmp3;
3405 pAsm->D.dst.writex = 1;
3406
3407 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3408 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3409 pAsm->S[0].src.reg = tmp2;
3410
3411 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3412 noneg_PVSSRC(&(pAsm->S[0].src));
3413
3414 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3415 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3416 pAsm->S[1].src.reg = tmp3;
3417
3418 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3419 neg_PVSSRC(&(pAsm->S[1].src));
3420
3421 if( GL_FALSE == next_ins(pAsm) )
3422 {
3423 return GL_FALSE;
3424 }
3425
3426 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3427 pAsm->D.dst.math = 1;
3428
3429 if( GL_FALSE == assemble_dst(pAsm) )
3430 {
3431 return GL_FALSE;
3432 }
3433
3434 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3435
3436 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3437 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3438 pAsm->S[0].src.reg = tmp3;
3439
3440 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3441 noneg_PVSSRC(&(pAsm->S[0].src));
3442
3443 if( GL_FALSE == next_ins(pAsm) )
3444 {
3445 return GL_FALSE;
3446 }
3447
3448 // MOV dst.z, tmp2.x
3449
3450 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3451
3452 if( GL_FALSE == assemble_dst(pAsm) )
3453 {
3454 return GL_FALSE;
3455 }
3456
3457 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3458
3459 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3460 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3461 pAsm->S[0].src.reg = tmp2;
3462
3463 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3464 noneg_PVSSRC(&(pAsm->S[0].src));
3465
3466 if( GL_FALSE == next_ins(pAsm) )
3467 {
3468 return GL_FALSE;
3469 }
3470
3471 // MOV dst.w 1.0
3472
3473 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3474
3475 if( GL_FALSE == assemble_dst(pAsm) )
3476 {
3477 return GL_FALSE;
3478 }
3479
3480 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3481
3482 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3483 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3484 pAsm->S[0].src.reg = tmp1;
3485
3486 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3487 noneg_PVSSRC(&(pAsm->S[0].src));
3488
3489 if( GL_FALSE == next_ins(pAsm) )
3490 {
3491 return GL_FALSE;
3492 }
3493
3494 return GL_TRUE;
3495 }
3496
3497 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3498 {
3499 int tmp, ii;
3500 GLboolean bReplaceDst = GL_FALSE;
3501 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3502
3503 if( GL_FALSE == checkop3(pAsm) )
3504 {
3505 return GL_FALSE;
3506 }
3507
3508 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3509 pAsm->D.dst.op3 = 1;
3510
3511 tmp = (-1);
3512
3513 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3514 { /* TODO : more investigation on MAD src and dst using same register */
3515 for(ii=0; ii<3; ii++)
3516 {
3517 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3518 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3519 {
3520 bReplaceDst = GL_TRUE;
3521 break;
3522 }
3523 }
3524 }
3525 if(0xF != pILInst->DstReg.WriteMask)
3526 { /* OP3 has no support for write mask */
3527 bReplaceDst = GL_TRUE;
3528 }
3529
3530 if(GL_TRUE == bReplaceDst)
3531 {
3532 tmp = gethelpr(pAsm);
3533
3534 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3535 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3536 pAsm->D.dst.reg = tmp;
3537
3538 nomask_PVSDST(&(pAsm->D.dst));
3539 }
3540 else
3541 {
3542 if( GL_FALSE == assemble_dst(pAsm) )
3543 {
3544 return GL_FALSE;
3545 }
3546 }
3547
3548 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3549 {
3550 return GL_FALSE;
3551 }
3552
3553 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3554 {
3555 return GL_FALSE;
3556 }
3557
3558 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3559 {
3560 return GL_FALSE;
3561 }
3562
3563 if ( GL_FALSE == next_ins(pAsm) )
3564 {
3565 return GL_FALSE;
3566 }
3567
3568 if (GL_TRUE == bReplaceDst)
3569 {
3570 if( GL_FALSE == assemble_dst(pAsm) )
3571 {
3572 return GL_FALSE;
3573 }
3574
3575 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3576
3577 //tmp for source
3578 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3579 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3580 pAsm->S[0].src.reg = tmp;
3581
3582 noneg_PVSSRC(&(pAsm->S[0].src));
3583 noswizzle_PVSSRC(&(pAsm->S[0].src));
3584
3585 if( GL_FALSE == next_ins(pAsm) )
3586 {
3587 return GL_FALSE;
3588 }
3589 }
3590
3591 return GL_TRUE;
3592 }
3593
3594 /* LIT dst, src */
3595 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3596 {
3597 unsigned int dstReg;
3598 unsigned int dstType;
3599 unsigned int srcReg;
3600 unsigned int srcType;
3601 checkop1(pAsm);
3602 int tmp = gethelpr(pAsm);
3603
3604 if( GL_FALSE == assemble_dst(pAsm) )
3605 {
3606 return GL_FALSE;
3607 }
3608 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3609 {
3610 return GL_FALSE;
3611 }
3612 dstReg = pAsm->D.dst.reg;
3613 dstType = pAsm->D.dst.rtype;
3614 srcReg = pAsm->S[0].src.reg;
3615 srcType = pAsm->S[0].src.rtype;
3616
3617 /* dst.xw, <- 1.0 */
3618 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3619 pAsm->D.dst.rtype = dstType;
3620 pAsm->D.dst.reg = dstReg;
3621 pAsm->D.dst.writex = 1;
3622 pAsm->D.dst.writey = 0;
3623 pAsm->D.dst.writez = 0;
3624 pAsm->D.dst.writew = 1;
3625 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3626 pAsm->S[0].src.reg = tmp;
3627 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3628 noneg_PVSSRC(&(pAsm->S[0].src));
3629 pAsm->S[0].src.swizzlex = SQ_SEL_1;
3630 pAsm->S[0].src.swizzley = SQ_SEL_1;
3631 pAsm->S[0].src.swizzlez = SQ_SEL_1;
3632 pAsm->S[0].src.swizzlew = SQ_SEL_1;
3633 if( GL_FALSE == next_ins(pAsm) )
3634 {
3635 return GL_FALSE;
3636 }
3637
3638 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3639 {
3640 return GL_FALSE;
3641 }
3642
3643 /* dst.y = max(src.x, 0.0) */
3644 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3645 pAsm->D.dst.rtype = dstType;
3646 pAsm->D.dst.reg = dstReg;
3647 pAsm->D.dst.writex = 0;
3648 pAsm->D.dst.writey = 1;
3649 pAsm->D.dst.writez = 0;
3650 pAsm->D.dst.writew = 0;
3651 pAsm->S[0].src.rtype = srcType;
3652 pAsm->S[0].src.reg = srcReg;
3653 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3654 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3655 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3656 pAsm->S[1].src.reg = tmp;
3657 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3658 noneg_PVSSRC(&(pAsm->S[1].src));
3659 pAsm->S[1].src.swizzlex = SQ_SEL_0;
3660 pAsm->S[1].src.swizzley = SQ_SEL_0;
3661 pAsm->S[1].src.swizzlez = SQ_SEL_0;
3662 pAsm->S[1].src.swizzlew = SQ_SEL_0;
3663 if( GL_FALSE == next_ins(pAsm) )
3664 {
3665 return GL_FALSE;
3666 }
3667
3668 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3669 {
3670 return GL_FALSE;
3671 }
3672
3673 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
3674
3675 /* dst.z = log(src.y) */
3676 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
3677 pAsm->D.dst.math = 1;
3678 pAsm->D.dst.rtype = dstType;
3679 pAsm->D.dst.reg = dstReg;
3680 pAsm->D.dst.writex = 0;
3681 pAsm->D.dst.writey = 0;
3682 pAsm->D.dst.writez = 1;
3683 pAsm->D.dst.writew = 0;
3684 pAsm->S[0].src.rtype = srcType;
3685 pAsm->S[0].src.reg = srcReg;
3686 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3687 if( GL_FALSE == next_ins(pAsm) )
3688 {
3689 return GL_FALSE;
3690 }
3691
3692 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3693 {
3694 return GL_FALSE;
3695 }
3696
3697 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
3698 {
3699 return GL_FALSE;
3700 }
3701
3702 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3703
3704 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3705
3706 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3707 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
3708 pAsm->D.dst.math = 1;
3709 pAsm->D.dst.op3 = 1;
3710 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3711 pAsm->D.dst.reg = tmp;
3712 pAsm->D.dst.writex = 1;
3713 pAsm->D.dst.writey = 0;
3714 pAsm->D.dst.writez = 0;
3715 pAsm->D.dst.writew = 0;
3716
3717 pAsm->S[0].src.rtype = srcType;
3718 pAsm->S[0].src.reg = srcReg;
3719 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3720
3721 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3722 pAsm->S[1].src.reg = dstReg;
3723 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3724 noneg_PVSSRC(&(pAsm->S[1].src));
3725 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
3726 pAsm->S[1].src.swizzley = SQ_SEL_Z;
3727 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3728 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
3729
3730 pAsm->S[2].src.rtype = srcType;
3731 pAsm->S[2].src.reg = srcReg;
3732 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3733
3734 if( GL_FALSE == next_ins(pAsm) )
3735 {
3736 return GL_FALSE;
3737 }
3738
3739 /* dst.z = exp(tmp.x) */
3740 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3741 pAsm->D.dst.math = 1;
3742 pAsm->D.dst.rtype = dstType;
3743 pAsm->D.dst.reg = dstReg;
3744 pAsm->D.dst.writex = 0;
3745 pAsm->D.dst.writey = 0;
3746 pAsm->D.dst.writez = 1;
3747 pAsm->D.dst.writew = 0;
3748
3749 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3750 pAsm->S[0].src.reg = tmp;
3751 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3752 noneg_PVSSRC(&(pAsm->S[0].src));
3753 pAsm->S[0].src.swizzlex = SQ_SEL_X;
3754 pAsm->S[0].src.swizzley = SQ_SEL_X;
3755 pAsm->S[0].src.swizzlez = SQ_SEL_X;
3756 pAsm->S[0].src.swizzlew = SQ_SEL_X;
3757
3758 if( GL_FALSE == next_ins(pAsm) )
3759 {
3760 return GL_FALSE;
3761 }
3762
3763 return GL_TRUE;
3764 }
3765
3766 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3767 {
3768 if( GL_FALSE == checkop2(pAsm) )
3769 {
3770 return GL_FALSE;
3771 }
3772
3773 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3774
3775 if( GL_FALSE == assemble_dst(pAsm) )
3776 {
3777 return GL_FALSE;
3778 }
3779
3780 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3781 {
3782 return GL_FALSE;
3783 }
3784
3785 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3786 {
3787 return GL_FALSE;
3788 }
3789
3790 if( GL_FALSE == next_ins(pAsm) )
3791 {
3792 return GL_FALSE;
3793 }
3794
3795 return GL_TRUE;
3796 }
3797
3798 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3799 {
3800 if( GL_FALSE == checkop2(pAsm) )
3801 {
3802 return GL_FALSE;
3803 }
3804
3805 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3806
3807 if( GL_FALSE == assemble_dst(pAsm) )
3808 {
3809 return GL_FALSE;
3810 }
3811
3812 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3813 {
3814 return GL_FALSE;
3815 }
3816
3817 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3818 {
3819 return GL_FALSE;
3820 }
3821
3822 if( GL_FALSE == next_ins(pAsm) )
3823 {
3824 return GL_FALSE;
3825 }
3826
3827 return GL_TRUE;
3828 }
3829
3830 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3831 {
3832 checkop1(pAsm);
3833
3834 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3835
3836 if (GL_FALSE == assemble_dst(pAsm))
3837 {
3838 return GL_FALSE;
3839 }
3840
3841 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3842 {
3843 return GL_FALSE;
3844 }
3845
3846 if ( GL_FALSE == next_ins(pAsm) )
3847 {
3848 return GL_FALSE;
3849 }
3850
3851 return GL_TRUE;
3852 }
3853
3854 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3855 {
3856 if( GL_FALSE == checkop2(pAsm) )
3857 {
3858 return GL_FALSE;
3859 }
3860
3861 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3862
3863 if( GL_FALSE == assemble_dst(pAsm) )
3864 {
3865 return GL_FALSE;
3866 }
3867
3868 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3869 {
3870 return GL_FALSE;
3871 }
3872
3873 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3874 {
3875 return GL_FALSE;
3876 }
3877
3878 if( GL_FALSE == next_ins(pAsm) )
3879 {
3880 return GL_FALSE;
3881 }
3882
3883 return GL_TRUE;
3884 }
3885
3886 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3887 {
3888 BITS tmp;
3889
3890 checkop1(pAsm);
3891
3892 tmp = gethelpr(pAsm);
3893
3894 // LG2 tmp.x, a.swizzle
3895 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3896 pAsm->D.dst.math = 1;
3897
3898 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3899 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3900 pAsm->D.dst.reg = tmp;
3901 nomask_PVSDST(&(pAsm->D.dst));
3902
3903 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3904 {
3905 return GL_FALSE;
3906 }
3907
3908 if( GL_FALSE == next_ins(pAsm) )
3909 {
3910 return GL_FALSE;
3911 }
3912
3913 // MUL tmp.x, tmp.x, b.swizzle
3914 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3915
3916 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3917 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3918 pAsm->D.dst.reg = tmp;
3919 nomask_PVSDST(&(pAsm->D.dst));
3920
3921 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3922 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3923 pAsm->S[0].src.reg = tmp;
3924 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3925 noneg_PVSSRC(&(pAsm->S[0].src));
3926
3927 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3928 {
3929 return GL_FALSE;
3930 }
3931
3932 if( GL_FALSE == next_ins(pAsm) )
3933 {
3934 return GL_FALSE;
3935 }
3936
3937 // EX2 dst.mask, tmp.x
3938 // EX2 tmp.x, tmp.x
3939 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3940 pAsm->D.dst.math = 1;
3941
3942 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3943 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3944 pAsm->D.dst.reg = tmp;
3945 nomask_PVSDST(&(pAsm->D.dst));
3946
3947 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3948 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3949 pAsm->S[0].src.reg = tmp;
3950 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3951 noneg_PVSSRC(&(pAsm->S[0].src));
3952
3953 if( GL_FALSE == next_ins(pAsm) )
3954 {
3955 return GL_FALSE;
3956 }
3957
3958 // Now replicate result to all necessary channels in destination
3959 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3960
3961 if( GL_FALSE == assemble_dst(pAsm) )
3962 {
3963 return GL_FALSE;
3964 }
3965
3966 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3967 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3968 pAsm->S[0].src.reg = tmp;
3969
3970 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3971 noneg_PVSSRC(&(pAsm->S[0].src));
3972
3973 if( GL_FALSE == next_ins(pAsm) )
3974 {
3975 return GL_FALSE;
3976 }
3977
3978 return GL_TRUE;
3979 }
3980
3981 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
3982 {
3983 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
3984 }
3985
3986 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
3987 {
3988 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
3989 }
3990
3991 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
3992 {
3993 BITS tmp;
3994
3995 checkop1(pAsm);
3996
3997 tmp = gethelpr(pAsm);
3998 /* tmp.x = src /2*PI */
3999 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4000 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4001 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4002 pAsm->D.dst.reg = tmp;
4003 pAsm->D.dst.writex = 1;
4004
4005 assemble_src(pAsm, 0, -1);
4006
4007 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4008 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4009 pAsm->D2.dst2.literal_slots = 1;
4010 pAsm->C[0].f = 1/(3.1415926535 * 2);
4011 pAsm->C[1].f = 0.0F;
4012
4013 next_ins(pAsm);
4014
4015 // COS dst.x, a.x
4016 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4017 pAsm->D.dst.math = 1;
4018
4019 assemble_dst(pAsm);
4020 /* mask y */
4021 pAsm->D.dst.writey = 0;
4022
4023 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4024 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4025 pAsm->S[0].src.reg = tmp;
4026 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4027 noneg_PVSSRC(&(pAsm->S[0].src));
4028
4029 if ( GL_FALSE == next_ins(pAsm) )
4030 {
4031 return GL_FALSE;
4032 }
4033
4034 // SIN dst.y, a.x
4035 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4036 pAsm->D.dst.math = 1;
4037
4038 assemble_dst(pAsm);
4039 /* mask x */
4040 pAsm->D.dst.writex = 0;
4041
4042 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4043 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4044 pAsm->S[0].src.reg = tmp;
4045 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4046 noneg_PVSSRC(&(pAsm->S[0].src));
4047
4048 if( GL_FALSE == next_ins(pAsm) )
4049 {
4050 return GL_FALSE;
4051 }
4052
4053 return GL_TRUE;
4054 }
4055
4056 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4057 {
4058 if( GL_FALSE == checkop2(pAsm) )
4059 {
4060 return GL_FALSE;
4061 }
4062
4063 pAsm->D.dst.opcode = opcode;
4064 //pAsm->D.dst.math = 1;
4065
4066 if( GL_FALSE == assemble_dst(pAsm) )
4067 {
4068 return GL_FALSE;
4069 }
4070
4071 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4072 {
4073 return GL_FALSE;
4074 }
4075
4076 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4077 {
4078 return GL_FALSE;
4079 }
4080
4081 if( GL_FALSE == next_ins(pAsm) )
4082 {
4083 return GL_FALSE;
4084 }
4085
4086 return GL_TRUE;
4087 }
4088
4089 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4090 {
4091 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4092
4093 pAsm->D.dst.opcode = opcode;
4094 pAsm->D.dst.math = 1;
4095 pAsm->D.dst.predicated = 1;
4096
4097 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4098 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4099 pAsm->D.dst.reg = pAsm->uHelpReg;
4100 pAsm->D.dst.writex = 1;
4101 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4102
4103 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4104 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4105 pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
4106 pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
4107 noneg_PVSSRC(&(pAsm->S[0].src));
4108
4109 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4110 pAsm->S[1].src.reg = pAsm->uHelpReg;
4111 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4112 noneg_PVSSRC(&(pAsm->S[1].src));
4113 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4114 pAsm->S[1].src.swizzley = SQ_SEL_0;
4115 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4116 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4117
4118 if( GL_FALSE == next_ins(pAsm) )
4119 {
4120 return GL_FALSE;
4121 }
4122
4123 return GL_TRUE;
4124 }
4125
4126 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4127 {
4128 if( GL_FALSE == checkop2(pAsm) )
4129 {
4130 return GL_FALSE;
4131 }
4132
4133 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4134
4135 if( GL_FALSE == assemble_dst(pAsm) )
4136 {
4137 return GL_FALSE;
4138 }
4139
4140 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4141 {
4142 return GL_FALSE;
4143 }
4144
4145 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4146 {
4147 return GL_FALSE;
4148 }
4149
4150 if( GL_FALSE == next_ins(pAsm) )
4151 {
4152 return GL_FALSE;
4153 }
4154
4155 return GL_TRUE;
4156 }
4157
4158 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4159 {
4160 if( GL_FALSE == checkop2(pAsm) )
4161 {
4162 return GL_FALSE;
4163 }
4164
4165 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4166
4167 if( GL_FALSE == assemble_dst(pAsm) )
4168 {
4169 return GL_FALSE;
4170 }
4171
4172 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4173 {
4174 return GL_FALSE;
4175 }
4176
4177 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4178 {
4179 return GL_FALSE;
4180 }
4181
4182 if( GL_FALSE == next_ins(pAsm) )
4183 {
4184 return GL_FALSE;
4185 }
4186
4187 return GL_TRUE;
4188 }
4189
4190 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
4191 {
4192 return GL_TRUE;
4193 }
4194
4195 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
4196 {
4197 GLboolean src_const;
4198 GLboolean need_barrier = GL_FALSE;
4199
4200 checkop1(pAsm);
4201
4202 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
4203 {
4204 case PROGRAM_UNIFORM:
4205 case PROGRAM_CONSTANT:
4206 case PROGRAM_LOCAL_PARAM:
4207 case PROGRAM_ENV_PARAM:
4208 case PROGRAM_STATE_VAR:
4209 src_const = GL_TRUE;
4210 break;
4211 case PROGRAM_TEMPORARY:
4212 case PROGRAM_INPUT:
4213 default:
4214 src_const = GL_FALSE;
4215 break;
4216 }
4217
4218 if (GL_TRUE == src_const)
4219 {
4220 if ( GL_FALSE == mov_temp(pAsm, 0) )
4221 return GL_FALSE;
4222 need_barrier = GL_TRUE;
4223 }
4224
4225 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4226 {
4227 GLuint tmp = gethelpr(pAsm);
4228 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4229 pAsm->D.dst.math = 1;
4230 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4231 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4232 pAsm->D.dst.reg = tmp;
4233 pAsm->D.dst.writew = 1;
4234
4235 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4236 {
4237 return GL_FALSE;
4238 }
4239 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4240 if( GL_FALSE == next_ins(pAsm) )
4241 {
4242 return GL_FALSE;
4243 }
4244
4245 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4246 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4247 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4248 pAsm->D.dst.reg = tmp;
4249 pAsm->D.dst.writex = 1;
4250 pAsm->D.dst.writey = 1;
4251 pAsm->D.dst.writez = 1;
4252 pAsm->D.dst.writew = 0;
4253
4254 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4255 {
4256 return GL_FALSE;
4257 }
4258 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4259 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4260 pAsm->S[1].src.reg = tmp;
4261 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
4262
4263 if( GL_FALSE == next_ins(pAsm) )
4264 {
4265 return GL_FALSE;
4266 }
4267
4268 pAsm->aArgSubst[1] = tmp;
4269 need_barrier = GL_TRUE;
4270 }
4271
4272 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
4273 {
4274 GLuint tmp1 = gethelpr(pAsm);
4275 GLuint tmp2 = gethelpr(pAsm);
4276
4277 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4278 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
4279 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4280 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4281 pAsm->D.dst.reg = tmp1;
4282 nomask_PVSDST(&(pAsm->D.dst));
4283
4284 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4285 {
4286 return GL_FALSE;
4287 }
4288
4289 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4290 {
4291 return GL_FALSE;
4292 }
4293
4294 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
4295 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
4296
4297 if( GL_FALSE == next_ins(pAsm) )
4298 {
4299 return GL_FALSE;
4300 }
4301
4302 /* tmp1.z = RCP_e(|tmp1.z|) */
4303 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4304 pAsm->D.dst.math = 1;
4305 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4306 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4307 pAsm->D.dst.reg = tmp1;
4308 pAsm->D.dst.writez = 1;
4309
4310 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4311 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4312 pAsm->S[0].src.reg = tmp1;
4313 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4314 pAsm->S[0].src.abs = 1;
4315
4316 next_ins(pAsm);
4317
4318 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4319 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4320 * muladd has no writemask, have to use another temp
4321 */
4322 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4323 pAsm->D.dst.op3 = 1;
4324 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4325 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4326 pAsm->D.dst.reg = tmp2;
4327
4328 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4329 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4330 pAsm->S[0].src.reg = tmp1;
4331 noswizzle_PVSSRC(&(pAsm->S[0].src));
4332 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4333 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4334 pAsm->S[1].src.reg = tmp1;
4335 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4336 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4337 /* immediate c 1.5 */
4338 pAsm->D2.dst2.literal_slots = 1;
4339 pAsm->C[0].f = 1.5F;
4340 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4341 pAsm->S[2].src.reg = tmp1;
4342 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
4343
4344 next_ins(pAsm);
4345
4346 /* tmp1.xy = temp2.xy */
4347 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4348 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4349 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4350 pAsm->D.dst.reg = tmp1;
4351 pAsm->D.dst.writex = 1;
4352 pAsm->D.dst.writey = 1;
4353 pAsm->D.dst.writez = 0;
4354 pAsm->D.dst.writew = 0;
4355
4356 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4357 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4358 pAsm->S[0].src.reg = tmp2;
4359 noswizzle_PVSSRC(&(pAsm->S[0].src));
4360
4361 next_ins(pAsm);
4362 pAsm->aArgSubst[1] = tmp1;
4363 need_barrier = GL_TRUE;
4364
4365 }
4366
4367 switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
4368 {
4369 case OPCODE_DDX:
4370 /* will these need WQM(1) on CF inst ? */
4371 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
4372 break;
4373 case OPCODE_DDY:
4374 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
4375 break;
4376 case OPCODE_TXB:
4377 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
4378 break;
4379 default:
4380 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4381 }
4382
4383 pAsm->is_tex = GL_TRUE;
4384 if ( GL_TRUE == need_barrier )
4385
4386 pAsm->is_tex = GL_TRUE;
4387 if ( GL_TRUE == need_barrier )
4388 {
4389 pAsm->need_tex_barrier = GL_TRUE;
4390 }
4391 // Set src1 to tex unit id
4392 pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
4393 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4394
4395 //No sw info from mesa compiler, so hard code here.
4396 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4397 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4398 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4399 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4400
4401 if( GL_FALSE == tex_dst(pAsm) )
4402 {
4403 return GL_FALSE;
4404 }
4405
4406 if( GL_FALSE == tex_src(pAsm) )
4407 {
4408 return GL_FALSE;
4409 }
4410
4411 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4412 {
4413 /* hopefully did swizzles before */
4414 noswizzle_PVSSRC(&(pAsm->S[0].src));
4415 }
4416
4417 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4418 {
4419 /* SAMPLE dst, tmp.yxwy, CUBE */
4420 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4421 pAsm->S[0].src.swizzley = SQ_SEL_X;
4422 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4423 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4424 }
4425
4426 if ( GL_FALSE == next_ins(pAsm) )
4427 {
4428 return GL_FALSE;
4429 }
4430
4431 return GL_TRUE;
4432 }
4433
4434 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4435 {
4436 BITS tmp;
4437
4438 if( GL_FALSE == checkop2(pAsm) )
4439 {
4440 return GL_FALSE;
4441 }
4442
4443 tmp = gethelpr(pAsm);
4444
4445 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4446
4447 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4448 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4449 pAsm->D.dst.reg = tmp;
4450 nomask_PVSDST(&(pAsm->D.dst));
4451
4452 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4453 {
4454 return GL_FALSE;
4455 }
4456
4457 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4458 {
4459 return GL_FALSE;
4460 }
4461
4462 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4463 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4464
4465 if( GL_FALSE == next_ins(pAsm) )
4466 {
4467 return GL_FALSE;
4468 }
4469
4470 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4471 pAsm->D.dst.op3 = 1;
4472
4473 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4474 {
4475 tmp = gethelpr(pAsm);
4476
4477 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4478 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4479 pAsm->D.dst.reg = tmp;
4480
4481 nomask_PVSDST(&(pAsm->D.dst));
4482 }
4483 else
4484 {
4485 if( GL_FALSE == assemble_dst(pAsm) )
4486 {
4487 return GL_FALSE;
4488 }
4489 }
4490
4491 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4492 {
4493 return GL_FALSE;
4494 }
4495
4496 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4497 {
4498 return GL_FALSE;
4499 }
4500
4501 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4502 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4503
4504 // result1 + (neg) result0
4505 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4506 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4507 pAsm->S[2].src.reg = tmp;
4508
4509 neg_PVSSRC(&(pAsm->S[2].src));
4510 noswizzle_PVSSRC(&(pAsm->S[2].src));
4511
4512 if( GL_FALSE == next_ins(pAsm) )
4513 {
4514 return GL_FALSE;
4515 }
4516
4517
4518 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4519 {
4520 if( GL_FALSE == assemble_dst(pAsm) )
4521 {
4522 return GL_FALSE;
4523 }
4524
4525 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4526
4527 // Use tmp as source
4528 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4529 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4530 pAsm->S[0].src.reg = tmp;
4531
4532 noneg_PVSSRC(&(pAsm->S[0].src));
4533 noswizzle_PVSSRC(&(pAsm->S[0].src));
4534
4535 if( GL_FALSE == next_ins(pAsm) )
4536 {
4537 return GL_FALSE;
4538 }
4539 }
4540
4541 return GL_TRUE;
4542 }
4543
4544 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4545 {
4546 return GL_TRUE;
4547 }
4548
4549 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
4550 {
4551 switch (uReason)
4552 {
4553 case FC_PUSH_VPM:
4554 pAsm->CALLSTACK[pAsm->CALLSP].current--;
4555 break;
4556 case FC_PUSH_WQM:
4557 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4558 break;
4559 case FC_LOOP:
4560 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4561 break;
4562 case FC_REP:
4563 /* TODO : for 16 vp asic, should -= 2; */
4564 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
4565 break;
4566 };
4567 }
4568
4569 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
4570 {
4571 if(GL_TRUE == bCheckMaxOnly)
4572 {
4573 switch (uReason)
4574 {
4575 case FC_PUSH_VPM:
4576 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
4577 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4578 {
4579 pAsm->CALLSTACK[pAsm->CALLSP].max =
4580 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
4581 }
4582 break;
4583 case FC_PUSH_WQM:
4584 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
4585 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4586 {
4587 pAsm->CALLSTACK[pAsm->CALLSP].max =
4588 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
4589 }
4590 break;
4591 }
4592 return;
4593 }
4594
4595 switch (uReason)
4596 {
4597 case FC_PUSH_VPM:
4598 pAsm->CALLSTACK[pAsm->CALLSP].current++;
4599 break;
4600 case FC_PUSH_WQM:
4601 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4602 break;
4603 case FC_LOOP:
4604 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4605 break;
4606 case FC_REP:
4607 /* TODO : for 16 vp asic, should += 2; */
4608 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
4609 break;
4610 };
4611
4612 if(pAsm->CALLSTACK[pAsm->CALLSP].current
4613 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4614 {
4615 pAsm->CALLSTACK[pAsm->CALLSP].max =
4616 pAsm->CALLSTACK[pAsm->CALLSP].current;
4617 }
4618 }
4619
4620 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
4621 {
4622 if(GL_FALSE == add_cf_instruction(pAsm) )
4623 {
4624 return GL_FALSE;
4625 }
4626
4627 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4628 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4629 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4630
4631 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4632 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4633 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4634 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4635
4636 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4637
4638 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
4639
4640 return GL_TRUE;
4641 }
4642
4643 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
4644 {
4645 if(GL_FALSE == add_cf_instruction(pAsm) )
4646 {
4647 return GL_FALSE;
4648 }
4649
4650 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4651 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4652 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4653
4654 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4655 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4656 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4657
4658 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4659
4660 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4661 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4662
4663 return GL_TRUE;
4664 }
4665
4666 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
4667 {
4668 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4669
4670 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4671
4672
4673 if(GL_FALSE == add_cf_instruction(pAsm) )
4674 {
4675 return GL_FALSE;
4676 }
4677
4678 if(GL_TRUE != bHasElse)
4679 {
4680 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4681 }
4682 else
4683 {
4684 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4685 }
4686 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4687 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4688
4689 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4690 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4691 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4692 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4693
4694 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4695
4696 pAsm->FCSP++;
4697 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
4698 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
4699 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
4700 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
4701
4702 #ifndef USE_CF_FOR_POP_AFTER
4703 if(GL_TRUE != bHasElse)
4704 {
4705 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4706 }
4707 #endif /* USE_CF_FOR_POP_AFTER */
4708
4709 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
4710
4711 return GL_TRUE;
4712 }
4713
4714 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
4715 {
4716 if(GL_FALSE == add_cf_instruction(pAsm) )
4717 {
4718 return GL_FALSE;
4719 }
4720
4721 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
4722 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4723 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4724
4725 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4726 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4727 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
4728 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4729
4730 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4731
4732 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
4733 0,
4734 sizeof(R700ControlFlowGenericClause *) );
4735 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
4736 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
4737
4738 #ifndef USE_CF_FOR_POP_AFTER
4739 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4740 #endif /* USE_CF_FOR_POP_AFTER */
4741
4742 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
4743
4744 return GL_TRUE;
4745 }
4746
4747 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
4748 {
4749 #ifdef USE_CF_FOR_POP_AFTER
4750 pops(pAsm, 1);
4751 #endif /* USE_CF_FOR_POP_AFTER */
4752
4753 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
4754
4755 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
4756 {
4757 /* no else in between */
4758 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4759 }
4760 else
4761 {
4762 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4763 }
4764
4765 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
4766 {
4767 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
4768 }
4769
4770 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
4771 {
4772 radeon_error("if/endif in shader code are not paired. \n");
4773 return GL_FALSE;
4774 }
4775
4776 pAsm->FCSP--;
4777
4778 decreaseCurrent(pAsm, FC_PUSH_VPM);
4779
4780 return GL_TRUE;
4781 }
4782
4783 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
4784 {
4785 if(GL_FALSE == add_cf_instruction(pAsm) )
4786 {
4787 return GL_FALSE;
4788 }
4789
4790
4791 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4792 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4793 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4794
4795 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4796 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4797 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
4798 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4799
4800 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4801
4802 pAsm->FCSP++;
4803 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
4804 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
4805 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
4806 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
4807 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
4808
4809 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
4810
4811 return GL_TRUE;
4812 }
4813
4814 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
4815 {
4816 #ifdef USE_CF_FOR_CONTINUE_BREAK
4817
4818 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4819
4820 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4821
4822 unsigned int unFCSP;
4823 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
4824 {
4825 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
4826 {
4827 break;
4828 }
4829 }
4830 if(0 == FC_LOOP)
4831 {
4832 radeon_error("Break is not inside loop/endloop pair.\n");
4833 return GL_FALSE;
4834 }
4835
4836 if(GL_FALSE == add_cf_instruction(pAsm) )
4837 {
4838 return GL_FALSE;
4839 }
4840
4841
4842 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4843 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4844 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4845
4846 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4847 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4848 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
4849
4850 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4851
4852 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4853
4854 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
4855 (void *)pAsm->fc_stack[unFCSP].mid,
4856 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
4857 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
4858 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
4859 pAsm->fc_stack[unFCSP].unNumMid++;
4860
4861 if(GL_FALSE == add_cf_instruction(pAsm) )
4862 {
4863 return GL_FALSE;
4864 }
4865
4866 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4867 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4868 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4869
4870 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4871 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4872 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4873
4874 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4875
4876 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4877 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4878
4879 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
4880
4881 #endif //USE_CF_FOR_CONTINUE_BREAK
4882 return GL_TRUE;
4883 }
4884
4885 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
4886 {
4887 #ifdef USE_CF_FOR_CONTINUE_BREAK
4888 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4889
4890 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4891
4892 unsigned int unFCSP;
4893 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
4894 {
4895 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
4896 {
4897 break;
4898 }
4899 }
4900 if(0 == FC_LOOP)
4901 {
4902 radeon_error("Continue is not inside loop/endloop pair.\n");
4903 return GL_FALSE;
4904 }
4905
4906 if(GL_FALSE == add_cf_instruction(pAsm) )
4907 {
4908 return GL_FALSE;
4909 }
4910
4911
4912 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4913 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4914 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4915
4916 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4917 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4918 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
4919
4920 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4921
4922 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4923
4924 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
4925 (void *)pAsm->fc_stack[unFCSP].mid,
4926 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
4927 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
4928 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
4929 pAsm->fc_stack[unFCSP].unNumMid++;
4930
4931 if(GL_FALSE == add_cf_instruction(pAsm) )
4932 {
4933 return GL_FALSE;
4934 }
4935
4936 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4937 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4938 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4939
4940 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4941 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4942 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4943
4944 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4945
4946 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4947 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4948
4949 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
4950
4951 #endif /* USE_CF_FOR_CONTINUE_BREAK */
4952
4953 return GL_TRUE;
4954 }
4955
4956 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
4957 {
4958 GLuint i;
4959
4960 if(GL_FALSE == add_cf_instruction(pAsm) )
4961 {
4962 return GL_FALSE;
4963 }
4964
4965
4966 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4967 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4968 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4969
4970 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4971 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4972 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
4973 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4974
4975 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4976
4977 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
4978 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4979
4980 #ifdef USE_CF_FOR_CONTINUE_BREAK
4981 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
4982 {
4983 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
4984 }
4985 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
4986 {
4987 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
4988 }
4989 #endif
4990
4991 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
4992 {
4993 radeon_error("loop/endloop in shader code are not paired. \n");
4994 return GL_FALSE;
4995 }
4996
4997 GLuint unFCSP;
4998 GLuint unIF = 0;
4999 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
5000 {
5001 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5002 {
5003 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5004 {
5005 breakLoopOnFlag(pAsm, unFCSP);
5006 break;
5007 }
5008 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5009 {
5010 unIF++;
5011 }
5012 }
5013 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
5014 {
5015 #ifdef USE_CF_FOR_POP_AFTER
5016 returnOnFlag(pAsm, unIF);
5017 #else
5018 returnOnFlag(pAsm, 0);
5019 #endif /* USE_CF_FOR_POP_AFTER */
5020 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
5021 }
5022 }
5023
5024 pAsm->FCSP--;
5025
5026 decreaseCurrent(pAsm, FC_LOOP);
5027
5028 return GL_TRUE;
5029 }
5030
5031 void add_return_inst(r700_AssemblerBase *pAsm)
5032 {
5033 if(GL_FALSE == add_cf_instruction(pAsm) )
5034 {
5035 return GL_FALSE;
5036 }
5037 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5038 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5039 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5040 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5041
5042 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5043 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5044 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
5045 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5046
5047 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5048 }
5049
5050 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
5051 {
5052 /* Put in sub */
5053 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
5054 {
5055 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
5056 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
5057 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
5058 if(NULL == pAsm->subs)
5059 {
5060 return GL_FALSE;
5061 }
5062 pAsm->unSubArraySize += 10;
5063 }
5064
5065 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
5066 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
5067 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
5068 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
5069
5070 pAsm->CALLSP++;
5071 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
5072 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
5073 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
5074 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
5075 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
5076 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
5077 SetActiveCFlist(pAsm->pR700Shader,
5078 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5079
5080 pAsm->unSubArrayPointer++;
5081
5082 /* start sub */
5083 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5084
5085 pAsm->FCSP++;
5086 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
5087
5088 checkStackDepth(pAsm, FC_REP, GL_FALSE);
5089
5090 return GL_TRUE;
5091 }
5092
5093 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
5094 {
5095 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
5096 {
5097 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5098 return GL_FALSE;
5099 }
5100
5101 /* copy max to sub structure */
5102 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
5103 = pAsm->CALLSTACK[pAsm->CALLSP].max;
5104
5105 decreaseCurrent(pAsm, FC_REP);
5106
5107 pAsm->CALLSP--;
5108 SetActiveCFlist(pAsm->pR700Shader,
5109 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5110
5111 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5112
5113 pAsm->FCSP--;
5114
5115 return GL_TRUE;
5116 }
5117
5118 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
5119 {
5120 GLuint unIF = 0;
5121
5122 if(pAsm->CALLSP > 0)
5123 { /* in sub */
5124 GLuint unFCSP;
5125 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5126 {
5127 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5128 {
5129 setRetInLoopFlag(pAsm, SQ_SEL_1);
5130 breakLoopOnFlag(pAsm, unFCSP);
5131 pAsm->unCFflags |= LOOPRET_FLAGS;
5132
5133 return GL_TRUE;
5134 }
5135 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5136 {
5137 unIF++;
5138 }
5139 }
5140 }
5141
5142 #ifdef USE_CF_FOR_POP_AFTER
5143 if(unIF > 0)
5144 {
5145 pops(pAsm, unIF);
5146 }
5147 #endif /* USE_CF_FOR_POP_AFTER */
5148
5149 add_return_inst(pAsm);
5150
5151 return GL_TRUE;
5152 }
5153
5154 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
5155 GLint nILindex,
5156 GLuint uiIL_Shift,
5157 GLuint uiNumberInsts,
5158 struct prog_instruction *pILInst,
5159 PRESUB_DESC * pPresubDesc)
5160 {
5161 GLint uiIL_Offset;
5162
5163 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5164
5165 if(GL_FALSE == add_cf_instruction(pAsm) )
5166 {
5167 return GL_FALSE;
5168 }
5169
5170 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
5171 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5172 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5173 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5174
5175 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5176 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5177 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
5178 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5179
5180 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5181
5182 /* Put in caller */
5183 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
5184 {
5185 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
5186 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
5187 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
5188 if(NULL == pAsm->callers)
5189 {
5190 return GL_FALSE;
5191 }
5192 pAsm->unCallerArraySize += 10;
5193 }
5194
5195 uiIL_Offset = nILindex + uiIL_Shift;
5196 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
5197 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
5198
5199 pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
5200 pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
5201
5202 pAsm->unCallerArrayPointer++;
5203
5204 int j;
5205 GLuint max;
5206 GLuint unSubID;
5207 GLboolean bRet;
5208 for(j=0; j<pAsm->unSubArrayPointer; j++)
5209 {
5210 if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
5211 { /* compiled before */
5212
5213 max = pAsm->subs[j].unStackDepthMax
5214 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5215 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5216 {
5217 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5218 }
5219
5220 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
5221 return GL_TRUE;
5222 }
5223 }
5224
5225 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
5226 unSubID = pAsm->unSubArrayPointer;
5227
5228 bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
5229
5230 if(GL_TRUE == bRet)
5231 {
5232 max = pAsm->subs[unSubID].unStackDepthMax
5233 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5234 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5235 {
5236 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5237 }
5238
5239 pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
5240 }
5241
5242 return bRet;
5243 }
5244
5245 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
5246 {
5247 GLfloat fLiteral[2] = {0.1, 0.0};
5248
5249 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5250 pAsm->D.dst.op3 = 0;
5251 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5252 pAsm->D.dst.reg = pAsm->flag_reg_index;
5253 pAsm->D.dst.writex = 1;
5254 pAsm->D.dst.writey = 0;
5255 pAsm->D.dst.writez = 0;
5256 pAsm->D.dst.writew = 0;
5257 pAsm->D2.dst2.literal_slots = 1;
5258 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5259 pAsm->D.dst.predicated = 0;
5260 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
5261 pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
5262 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5263 #if 0
5264 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
5265 //pAsm->S[0].src.reg = 0;
5266 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5267 noneg_PVSSRC(&(pAsm->S[0].src));
5268 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5269 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5270 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5271 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5272
5273 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5274 {
5275 return GL_FALSE;
5276 }
5277 #else
5278 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5279 pAsm->S[0].src.reg = 0;
5280 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5281 noneg_PVSSRC(&(pAsm->S[0].src));
5282 pAsm->S[0].src.swizzlex = flagValue;
5283 pAsm->S[0].src.swizzley = flagValue;
5284 pAsm->S[0].src.swizzlez = flagValue;
5285 pAsm->S[0].src.swizzlew = flagValue;
5286
5287 if( GL_FALSE == next_ins(pAsm) )
5288 {
5289 return GL_FALSE;
5290 }
5291 #endif
5292
5293 return GL_TRUE;
5294 }
5295
5296 GLboolean testFlag(r700_AssemblerBase *pAsm)
5297 {
5298 GLfloat fLiteral[2] = {0.1, 0.0};
5299
5300 //Test flag
5301 GLuint tmp = gethelpr(pAsm);
5302 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5303
5304 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
5305 pAsm->D.dst.math = 1;
5306 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5307 pAsm->D.dst.reg = tmp;
5308 pAsm->D.dst.writex = 1;
5309 pAsm->D.dst.writey = 0;
5310 pAsm->D.dst.writez = 0;
5311 pAsm->D.dst.writew = 0;
5312 pAsm->D2.dst2.literal_slots = 1;
5313 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5314 pAsm->D.dst.predicated = 1;
5315 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5316
5317 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5318 pAsm->S[0].src.reg = pAsm->flag_reg_index;
5319 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5320 noneg_PVSSRC(&(pAsm->S[0].src));
5321 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5322 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5323 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5324 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5325 #if 0
5326 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
5327 //pAsm->S[1].src.reg = 0;
5328 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5329 noneg_PVSSRC(&(pAsm->S[1].src));
5330 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5331 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5332 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5333 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5334
5335 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5336 {
5337 return GL_FALSE;
5338 }
5339 #else
5340 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
5341 pAsm->S[1].src.reg = 0;
5342 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5343 noneg_PVSSRC(&(pAsm->S[1].src));
5344 pAsm->S[1].src.swizzlex = SQ_SEL_1;
5345 pAsm->S[1].src.swizzley = SQ_SEL_1;
5346 pAsm->S[1].src.swizzlez = SQ_SEL_1;
5347 pAsm->S[1].src.swizzlew = SQ_SEL_1;
5348
5349 if( GL_FALSE == next_ins(pAsm) )
5350 {
5351 return GL_FALSE;
5352 }
5353 #endif
5354
5355 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5356
5357 return GL_TRUE;
5358 }
5359
5360 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
5361 {
5362 testFlag(pAsm);
5363 jumpToOffest(pAsm, 1, 4);
5364 setRetInLoopFlag(pAsm, SQ_SEL_0);
5365 pops(pAsm, unIF + 1);
5366 add_return_inst(pAsm);
5367
5368 return GL_TRUE;
5369 }
5370
5371 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
5372 {
5373 testFlag(pAsm);
5374
5375 //break
5376 if(GL_FALSE == add_cf_instruction(pAsm) )
5377 {
5378 return GL_FALSE;
5379 }
5380
5381 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5382 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5383 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5384
5385 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5386 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5387 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5388 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5389
5390 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5391
5392 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5393 (void *)pAsm->fc_stack[unFCSP].mid,
5394 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5395 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5396 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5397 pAsm->fc_stack[unFCSP].unNumMid++;
5398
5399 pops(pAsm, 1);
5400
5401 return GL_TRUE;
5402 }
5403
5404 GLboolean AssembleInstr(GLuint uiFirstInst,
5405 GLuint uiIL_Shift,
5406 GLuint uiNumberInsts,
5407 struct prog_instruction *pILInst,
5408 r700_AssemblerBase *pR700AsmCode)
5409 {
5410 GLuint i;
5411
5412 pR700AsmCode->pILInst = pILInst;
5413 for(i=uiFirstInst; i<uiNumberInsts; i++)
5414 {
5415 pR700AsmCode->uiCurInst = i;
5416
5417 #ifndef USE_CF_FOR_CONTINUE_BREAK
5418 if(OPCODE_BRK == pILInst[i+1].Opcode)
5419 {
5420 switch(pILInst[i].Opcode)
5421 {
5422 case OPCODE_SLE:
5423 pILInst[i].Opcode = OPCODE_SGT;
5424 break;
5425 case OPCODE_SLT:
5426 pILInst[i].Opcode = OPCODE_SGE;
5427 break;
5428 case OPCODE_SGE:
5429 pILInst[i].Opcode = OPCODE_SLT;
5430 break;
5431 case OPCODE_SGT:
5432 pILInst[i].Opcode = OPCODE_SLE;
5433 break;
5434 case OPCODE_SEQ:
5435 pILInst[i].Opcode = OPCODE_SNE;
5436 break;
5437 case OPCODE_SNE:
5438 pILInst[i].Opcode = OPCODE_SEQ;
5439 break;
5440 default:
5441 break;
5442 }
5443 }
5444 #endif
5445 if(pILInst[i].CondUpdate == 1)
5446 {
5447 /* remember dest register used for cond evaluation */
5448 /* XXX also handle PROGRAM_OUTPUT registers here? */
5449 pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
5450 }
5451
5452 switch (pILInst[i].Opcode)
5453 {
5454 case OPCODE_ABS:
5455 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
5456 return GL_FALSE;
5457 break;
5458 case OPCODE_ADD:
5459 case OPCODE_SUB:
5460 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
5461 return GL_FALSE;
5462 break;
5463
5464 case OPCODE_ARL:
5465 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
5466 return GL_FALSE;
5467 break;
5468 case OPCODE_ARR:
5469 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5470 //if ( GL_FALSE == assemble_BAD("ARR") )
5471 return GL_FALSE;
5472 break;
5473
5474 case OPCODE_CMP:
5475 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
5476 return GL_FALSE;
5477 break;
5478 case OPCODE_COS:
5479 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
5480 return GL_FALSE;
5481 break;
5482
5483 case OPCODE_DP3:
5484 case OPCODE_DP4:
5485 case OPCODE_DPH:
5486 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
5487 return GL_FALSE;
5488 break;
5489
5490 case OPCODE_DST:
5491 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
5492 return GL_FALSE;
5493 break;
5494
5495 case OPCODE_EX2:
5496 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
5497 return GL_FALSE;
5498 break;
5499 case OPCODE_EXP:
5500 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
5501 return GL_FALSE;
5502 break;
5503
5504 case OPCODE_FLR:
5505 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
5506 return GL_FALSE;
5507 break;
5508 //case OP_FLR_INT: ;
5509
5510 // if ( GL_FALSE == assemble_FLR_INT() )
5511 // return GL_FALSE;
5512 // break;
5513
5514 case OPCODE_FRC:
5515 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
5516 return GL_FALSE;
5517 break;
5518
5519 case OPCODE_KIL:
5520 case OPCODE_KIL_NV:
5521 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
5522 return GL_FALSE;
5523 break;
5524 case OPCODE_LG2:
5525 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
5526 return GL_FALSE;
5527 break;
5528 case OPCODE_LIT:
5529 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
5530 return GL_FALSE;
5531 break;
5532 case OPCODE_LRP:
5533 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
5534 return GL_FALSE;
5535 break;
5536 case OPCODE_LOG:
5537 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
5538 return GL_FALSE;
5539 break;
5540
5541 case OPCODE_MAD:
5542 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
5543 return GL_FALSE;
5544 break;
5545 case OPCODE_MAX:
5546 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
5547 return GL_FALSE;
5548 break;
5549 case OPCODE_MIN:
5550 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
5551 return GL_FALSE;
5552 break;
5553
5554 case OPCODE_MOV:
5555 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5556 return GL_FALSE;
5557 break;
5558 case OPCODE_MUL:
5559 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
5560 return GL_FALSE;
5561 break;
5562
5563 case OPCODE_NOISE1:
5564 {
5565 callPreSub(pR700AsmCode,
5566 GLSL_NOISE1,
5567 &noise1_presub,
5568 pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
5569 1);
5570 radeon_error("noise1: not yet supported shader instruction\n");
5571 };
5572 break;
5573 case OPCODE_NOISE2:
5574 radeon_error("noise2: not yet supported shader instruction\n");
5575 break;
5576 case OPCODE_NOISE3:
5577 radeon_error("noise3: not yet supported shader instruction\n");
5578 break;
5579 case OPCODE_NOISE4:
5580 radeon_error("noise4: not yet supported shader instruction\n");
5581 break;
5582
5583 case OPCODE_POW:
5584 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
5585 return GL_FALSE;
5586 break;
5587 case OPCODE_RCP:
5588 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
5589 return GL_FALSE;
5590 break;
5591 case OPCODE_RSQ:
5592 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
5593 return GL_FALSE;
5594 break;
5595 case OPCODE_SIN:
5596 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
5597 return GL_FALSE;
5598 break;
5599 case OPCODE_SCS:
5600 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
5601 return GL_FALSE;
5602 break;
5603
5604 case OPCODE_SEQ:
5605 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
5606 {
5607 return GL_FALSE;
5608 }
5609 break;
5610
5611 case OPCODE_SGT:
5612 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5613 {
5614 return GL_FALSE;
5615 }
5616 break;
5617
5618 case OPCODE_SGE:
5619 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
5620 {
5621 return GL_FALSE;
5622 }
5623 break;
5624
5625 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5626 case OPCODE_SLT:
5627 {
5628 struct prog_src_register SrcRegSave[2];
5629 SrcRegSave[0] = pILInst[i].SrcReg[0];
5630 SrcRegSave[1] = pILInst[i].SrcReg[1];
5631 pILInst[i].SrcReg[0] = SrcRegSave[1];
5632 pILInst[i].SrcReg[1] = SrcRegSave[0];
5633 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5634 {
5635 pILInst[i].SrcReg[0] = SrcRegSave[0];
5636 pILInst[i].SrcReg[1] = SrcRegSave[1];
5637 return GL_FALSE;
5638 }
5639 pILInst[i].SrcReg[0] = SrcRegSave[0];
5640 pILInst[i].SrcReg[1] = SrcRegSave[1];
5641 }
5642 break;
5643
5644 case OPCODE_SLE:
5645 {
5646 struct prog_src_register SrcRegSave[2];
5647 SrcRegSave[0] = pILInst[i].SrcReg[0];
5648 SrcRegSave[1] = pILInst[i].SrcReg[1];
5649 pILInst[i].SrcReg[0] = SrcRegSave[1];
5650 pILInst[i].SrcReg[1] = SrcRegSave[0];
5651 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
5652 {
5653 pILInst[i].SrcReg[0] = SrcRegSave[0];
5654 pILInst[i].SrcReg[1] = SrcRegSave[1];
5655 return GL_FALSE;
5656 }
5657 pILInst[i].SrcReg[0] = SrcRegSave[0];
5658 pILInst[i].SrcReg[1] = SrcRegSave[1];
5659 }
5660 break;
5661
5662 case OPCODE_SNE:
5663 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
5664 {
5665 return GL_FALSE;
5666 }
5667 break;
5668
5669 //case OP_STP:
5670 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
5671 // return GL_FALSE;
5672 // break;
5673
5674 case OPCODE_SWZ:
5675 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5676 {
5677 return GL_FALSE;
5678 }
5679 else
5680 {
5681 if( (i+1)<uiNumberInsts )
5682 {
5683 if(OPCODE_END != pILInst[i+1].Opcode)
5684 {
5685 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
5686 {
5687 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
5688 }
5689 }
5690 }
5691 }
5692 break;
5693 case OPCODE_DDX:
5694 case OPCODE_DDY:
5695 case OPCODE_TEX:
5696 case OPCODE_TXB:
5697 case OPCODE_TXP:
5698 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
5699 return GL_FALSE;
5700 break;
5701
5702 case OPCODE_TRUNC:
5703 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
5704 return GL_FALSE;
5705 break;
5706
5707 case OPCODE_XPD:
5708 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
5709 return GL_FALSE;
5710 break;
5711
5712 case OPCODE_IF :
5713 {
5714 GLboolean bHasElse = GL_FALSE;
5715
5716 if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE)
5717 {
5718 bHasElse = GL_TRUE;
5719 }
5720
5721 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
5722 {
5723 return GL_FALSE;
5724 }
5725 }
5726 break;
5727
5728 case OPCODE_ELSE :
5729 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
5730 return GL_FALSE;
5731 break;
5732
5733 case OPCODE_ENDIF:
5734 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
5735 return GL_FALSE;
5736 break;
5737
5738 case OPCODE_BGNLOOP:
5739 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
5740 {
5741 return GL_FALSE;
5742 }
5743 break;
5744
5745 case OPCODE_BRK:
5746 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
5747 {
5748 return GL_FALSE;
5749 }
5750 break;
5751
5752 case OPCODE_CONT:
5753 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
5754 {
5755 return GL_FALSE;
5756 }
5757 break;
5758
5759 case OPCODE_ENDLOOP:
5760 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
5761 {
5762 return GL_FALSE;
5763 }
5764 break;
5765
5766 case OPCODE_BGNSUB:
5767 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
5768 {
5769 return GL_FALSE;
5770 }
5771 break;
5772
5773 case OPCODE_RET:
5774 if( GL_FALSE == assemble_RET(pR700AsmCode) )
5775 {
5776 return GL_FALSE;
5777 }
5778 break;
5779
5780 case OPCODE_CAL:
5781 if( GL_FALSE == assemble_CAL(pR700AsmCode,
5782 pILInst[i].BranchTarget,
5783 uiIL_Shift,
5784 uiNumberInsts,
5785 pILInst,
5786 NULL) )
5787 {
5788 return GL_FALSE;
5789 }
5790 break;
5791
5792 //case OPCODE_EXPORT:
5793 // if ( GL_FALSE == assemble_EXPORT() )
5794 // return GL_FALSE;
5795 // break;
5796
5797 case OPCODE_ENDSUB:
5798 return assemble_ENDSUB(pR700AsmCode);
5799
5800 case OPCODE_END:
5801 //pR700AsmCode->uiCurInst = i;
5802 //This is to remaind that if in later exoort there is depth/stencil
5803 //export, we need a mov to re-arrange DST channel, where using a
5804 //psuedo inst, we will use this end inst to do it.
5805 return GL_TRUE;
5806
5807 default:
5808 radeon_error("internal: unknown instruction\n");
5809 return GL_FALSE;
5810 }
5811 }
5812
5813 return GL_TRUE;
5814 }
5815
5816 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
5817 {
5818 setRetInLoopFlag(pAsm, SQ_SEL_0);
5819 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5820 return GL_TRUE;
5821 }
5822
5823 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
5824 {
5825 GLuint i;
5826 GLuint unCFoffset;
5827 TypedShaderList * plstCFmain;
5828 TypedShaderList * plstCFsub;
5829
5830 R700ShaderInstruction * pInst;
5831 R700ControlFlowGenericClause * pCFInst;
5832
5833 R700ControlFlowALUClause * pCF_ALU;
5834 R700ALUInstruction * pALU;
5835 GLuint unConstOffset = 0;
5836 GLuint unRegOffset;
5837 GLuint unMinRegIndex;
5838
5839 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
5840
5841 /* remove flags init if they are not used */
5842 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
5843 {
5844 R700ControlFlowALUClause * pCF_ALU;
5845 pInst = plstCFmain->pHead;
5846 while(pInst)
5847 {
5848 if(SIT_CF_ALU == pInst->m_ShaderInstType)
5849 {
5850 pCF_ALU = (R700ControlFlowALUClause *)pInst;
5851 if(0 == pCF_ALU->m_Word1.f.count)
5852 {
5853 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
5854 }
5855 else
5856 {
5857 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
5858
5859 pALU->m_pLinkedALUClause = NULL;
5860 pALU = (R700ALUInstruction *)(pALU->pNextInst);
5861 pALU->m_pLinkedALUClause = pCF_ALU;
5862 pCF_ALU->m_pLinkedALUInstruction = pALU;
5863
5864 pCF_ALU->m_Word1.f.count--;
5865 }
5866 break;
5867 }
5868 pInst = pInst->pNextInst;
5869 };
5870 }
5871
5872 if(pAsm->CALLSTACK[0].max > 0)
5873 {
5874 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
5875 }
5876
5877 if(0 == pAsm->unSubArrayPointer)
5878 {
5879 return GL_TRUE;
5880 }
5881
5882 unCFoffset = plstCFmain->uNumOfNode;
5883
5884 if(NULL != pILProg->Parameters)
5885 {
5886 unConstOffset = pILProg->Parameters->NumParameters;
5887 }
5888
5889 /* Reloc subs */
5890 for(i=0; i<pAsm->unSubArrayPointer; i++)
5891 {
5892 pAsm->subs[i].unCFoffset = unCFoffset;
5893 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
5894
5895 pInst = plstCFsub->pHead;
5896
5897 /* reloc instructions */
5898 while(pInst)
5899 {
5900 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
5901 {
5902 pCFInst = (R700ControlFlowGenericClause *)pInst;
5903
5904 switch (pCFInst->m_Word1.f.cf_inst)
5905 {
5906 case SQ_CF_INST_POP:
5907 case SQ_CF_INST_JUMP:
5908 case SQ_CF_INST_ELSE:
5909 case SQ_CF_INST_LOOP_END:
5910 case SQ_CF_INST_LOOP_START:
5911 case SQ_CF_INST_LOOP_START_NO_AL:
5912 case SQ_CF_INST_LOOP_CONTINUE:
5913 case SQ_CF_INST_LOOP_BREAK:
5914 pCFInst->m_Word0.f.addr += unCFoffset;
5915 break;
5916 default:
5917 break;
5918 }
5919 }
5920
5921 pInst->m_uIndex += unCFoffset;
5922
5923 pInst = pInst->pNextInst;
5924 };
5925
5926 if(NULL != pAsm->subs[i].pPresubDesc)
5927 {
5928 GLuint uNumSrc;
5929
5930 unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
5931 unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
5932 unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
5933
5934 pInst = plstCFsub->pHead;
5935 while(pInst)
5936 {
5937 if(SIT_CF_ALU == pInst->m_ShaderInstType)
5938 {
5939 pCF_ALU = (R700ControlFlowALUClause *)pInst;
5940
5941 pALU = pCF_ALU->m_pLinkedALUInstruction;
5942 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
5943 {
5944 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
5945
5946 if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
5947 {
5948 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
5949 }
5950 else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
5951 {
5952 pALU->m_Word0.f.src0_sel += unConstOffset;
5953 }
5954
5955 if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
5956 >= SQ_OP3_INST_MUL_LIT )
5957 { /* op3 : 3 srcs */
5958 if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
5959 {
5960 pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
5961 }
5962 else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
5963 {
5964 pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
5965 }
5966 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
5967 {
5968 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
5969 }
5970 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
5971 {
5972 pALU->m_Word0.f.src1_sel += unConstOffset;
5973 }
5974 }
5975 else
5976 {
5977 if(pAsm->bR6xx)
5978 {
5979 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
5980 }
5981 else
5982 {
5983 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
5984 }
5985 if(2 == uNumSrc)
5986 { /* 2 srcs */
5987 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
5988 {
5989 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
5990 }
5991 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
5992 {
5993 pALU->m_Word0.f.src1_sel += unConstOffset;
5994 }
5995 }
5996 }
5997 pALU = (R700ALUInstruction*)(pALU->pNextInst);
5998 }
5999 }
6000 pInst = pInst->pNextInst;
6001 };
6002 }
6003
6004 /* Put sub into main */
6005 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
6006 plstCFmain->pTail = plstCFsub->pTail;
6007 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
6008
6009 unCFoffset += plstCFsub->uNumOfNode;
6010 }
6011
6012 /* reloc callers */
6013 for(i=0; i<pAsm->unCallerArrayPointer; i++)
6014 {
6015 pAsm->callers[i].cf_ptr->m_Word0.f.addr
6016 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
6017
6018 if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
6019 {
6020 unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
6021 unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
6022
6023 if(NULL != pAsm->callers[i].prelude_cf_ptr)
6024 {
6025 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
6026 pALU = pCF_ALU->m_pLinkedALUInstruction;
6027 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6028 {
6029 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
6030 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6031 }
6032 }
6033 if(NULL != pAsm->callers[i].finale_cf_ptr)
6034 {
6035 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
6036 pALU = pCF_ALU->m_pLinkedALUInstruction;
6037 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6038 {
6039 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
6040 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6041 }
6042 }
6043 }
6044 }
6045
6046 return GL_TRUE;
6047 }
6048
6049 GLboolean callPreSub(r700_AssemblerBase* pAsm,
6050 LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
6051 COMPILED_SUB * pCompiledSub,
6052 GLshort uOutReg,
6053 GLshort uNumValidSrc)
6054 {
6055 /* save assemble context */
6056 GLuint starting_temp_register_number_save;
6057 GLuint number_used_registers_save;
6058 GLuint uFirstHelpReg_save;
6059 GLuint uHelpReg_save;
6060 GLuint uiCurInst_save;
6061 struct prog_instruction *pILInst_save;
6062 PRESUB_DESC * pPresubDesc;
6063 GLboolean bRet;
6064 int i;
6065
6066 R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
6067
6068 /* copy srcs to presub inputs */
6069 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6070 for(i=0; i<uNumValidSrc; i++)
6071 {
6072 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6073 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6074 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6075 pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
6076 pAsm->D.dst.writex = 1;
6077 pAsm->D.dst.writey = 1;
6078 pAsm->D.dst.writez = 1;
6079 pAsm->D.dst.writew = 1;
6080
6081 if( GL_FALSE == assemble_src(pAsm, i, 0) )
6082 {
6083 return GL_FALSE;
6084 }
6085
6086 next_ins(pAsm);
6087 }
6088 if(uNumValidSrc > 0)
6089 {
6090 prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr;
6091 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6092 }
6093
6094 /* browse thro existing presubs. */
6095 for(i=0; i<pAsm->unNumPresub; i++)
6096 {
6097 if(pAsm->presubs[i].sptSigniture == scriptSigniture)
6098 {
6099 break;
6100 }
6101 }
6102
6103 if(i == pAsm->unNumPresub)
6104 { /* not loaded yet */
6105 /* save assemble context */
6106 number_used_registers_save = pAsm->number_used_registers;
6107 uFirstHelpReg_save = pAsm->uFirstHelpReg;
6108 uHelpReg_save = pAsm->uHelpReg;
6109 starting_temp_register_number_save = pAsm->starting_temp_register_number;
6110 pILInst_save = pAsm->pILInst;
6111 uiCurInst_save = pAsm->uiCurInst;
6112
6113 /* alloc in presub */
6114 if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
6115 {
6116 pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
6117 sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
6118 sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
6119 if(NULL == pAsm->presubs)
6120 {
6121 radeon_error("No memeory to allocate built in shader function description structures. \n");
6122 return GL_FALSE;
6123 }
6124 pAsm->unPresubArraySize += 4;
6125 }
6126
6127 pPresubDesc = &(pAsm->presubs[i]);
6128 pPresubDesc->sptSigniture = scriptSigniture;
6129
6130 /* constants offsets need to be final resolved at reloc. */
6131 if(0 == pAsm->unNumPresub)
6132 {
6133 pPresubDesc->unConstantsStart = 0;
6134 }
6135 else
6136 {
6137 pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
6138 + pAsm->presubs[i-1].pCompiledSub->NumParameters;
6139 }
6140
6141 pPresubDesc->pCompiledSub = pCompiledSub;
6142
6143 pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
6144 pPresubDesc->maxStartReg = uFirstHelpReg_save;
6145 pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
6146
6147 pAsm->unNumPresub++;
6148
6149 /* setup new assemble context */
6150 pAsm->starting_temp_register_number = 0;
6151 pAsm->number_used_registers = pCompiledSub->NumTemporaries;
6152 pAsm->uFirstHelpReg = pAsm->number_used_registers;
6153 pAsm->uHelpReg = pAsm->uFirstHelpReg;
6154
6155 bRet = assemble_CAL(pAsm,
6156 0,
6157 pPresubDesc->subIL_Shift,
6158 pCompiledSub->NumInstructions,
6159 pCompiledSub->Instructions,
6160 pPresubDesc);
6161
6162
6163 pPresubDesc->number_used_registers = pAsm->number_used_registers;
6164
6165 /* restore assemble context */
6166 pAsm->number_used_registers = number_used_registers_save;
6167 pAsm->uFirstHelpReg = uFirstHelpReg_save;
6168 pAsm->uHelpReg = uHelpReg_save;
6169 pAsm->starting_temp_register_number = starting_temp_register_number_save;
6170 pAsm->pILInst = pILInst_save;
6171 pAsm->uiCurInst = uiCurInst_save;
6172 }
6173 else
6174 { /* was loaded */
6175 pPresubDesc = &(pAsm->presubs[i]);
6176
6177 bRet = assemble_CAL(pAsm,
6178 0,
6179 pPresubDesc->subIL_Shift,
6180 pCompiledSub->NumInstructions,
6181 pCompiledSub->Instructions,
6182 pPresubDesc);
6183 }
6184
6185 if(GL_FALSE == bRet)
6186 {
6187 radeon_error("Shader presub assemble failed. \n");
6188 }
6189 else
6190 {
6191 /* copy presub output to real dst */
6192 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6193 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6194
6195 if( GL_FALSE == assemble_dst(pAsm) )
6196 {
6197 return GL_FALSE;
6198 }
6199
6200 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6201 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
6202 pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
6203 pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
6204 pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
6205 pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
6206 pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
6207
6208 next_ins(pAsm);
6209
6210 pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr;
6211 pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
6212 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6213 }
6214
6215 if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
6216 {
6217 pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
6218 }
6219 if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
6220 {
6221 pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
6222 }
6223
6224 return bRet;
6225 }
6226
6227 GLboolean Process_Export(r700_AssemblerBase* pAsm,
6228 GLuint type,
6229 GLuint export_starting_index,
6230 GLuint export_count,
6231 GLuint starting_register_number,
6232 GLboolean is_depth_export)
6233 {
6234 unsigned char ucWriteMask;
6235
6236 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
6237 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
6238
6239 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
6240
6241 switch (type)
6242 {
6243 case SQ_EXPORT_PIXEL:
6244 if(GL_TRUE == is_depth_export)
6245 {
6246 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
6247 }
6248 else
6249 {
6250 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
6251 }
6252 break;
6253
6254 case SQ_EXPORT_POS:
6255 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
6256 break;
6257
6258 case SQ_EXPORT_PARAM:
6259 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
6260 break;
6261
6262 default:
6263 radeon_error("Unknown export type: %d\n", type);
6264 return GL_FALSE;
6265 break;
6266 }
6267
6268 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
6269
6270 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
6271 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
6272 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
6273
6274 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
6275 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
6276 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6277 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
6278 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6279 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
6280
6281 if (export_count == 1)
6282 {
6283 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
6284 /* exports Z as a float into Red channel */
6285 if (GL_TRUE == is_depth_export)
6286 ucWriteMask = 0x1;
6287
6288 if( (ucWriteMask & 0x1) != 0)
6289 {
6290 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6291 }
6292 else
6293 {
6294 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
6295 }
6296 if( ((ucWriteMask>>1) & 0x1) != 0)
6297 {
6298 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6299 }
6300 else
6301 {
6302 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
6303 }
6304 if( ((ucWriteMask>>2) & 0x1) != 0)
6305 {
6306 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6307 }
6308 else
6309 {
6310 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
6311 }
6312 if( ((ucWriteMask>>3) & 0x1) != 0)
6313 {
6314 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6315 }
6316 else
6317 {
6318 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
6319 }
6320 }
6321 else
6322 {
6323 // This should only be used if all components for all registers have been written
6324 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6325 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6326 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6327 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6328 }
6329
6330 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
6331
6332 return GL_TRUE;
6333 }
6334
6335 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
6336 {
6337 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
6338 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
6339
6340 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6341
6342 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6343
6344 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6345 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6346 pAsm->D.dst.reg = pAsm->depth_export_register_number;
6347
6348 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
6349
6350 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6351 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6352 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
6353
6354 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
6355
6356 noneg_PVSSRC(&(pAsm->S[0].src));
6357
6358 if( GL_FALSE == next_ins(pAsm) )
6359 {
6360 return GL_FALSE;
6361 }
6362
6363 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
6364
6365 return GL_TRUE;
6366 }
6367
6368 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
6369 GLbitfield OutputsWritten)
6370 {
6371 unsigned int unBit;
6372 GLuint export_count = 0;
6373
6374 if(pR700AsmCode->depth_export_register_number >= 0)
6375 {
6376 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
6377 {
6378 return GL_FALSE;
6379 }
6380 }
6381
6382 unBit = 1 << FRAG_RESULT_COLOR;
6383 if(OutputsWritten & unBit)
6384 {
6385 if( GL_FALSE == Process_Export(pR700AsmCode,
6386 SQ_EXPORT_PIXEL,
6387 0,
6388 1,
6389 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
6390 GL_FALSE) )
6391 {
6392 return GL_FALSE;
6393 }
6394 export_count++;
6395 }
6396 unBit = 1 << FRAG_RESULT_DEPTH;
6397 if(OutputsWritten & unBit)
6398 {
6399 if( GL_FALSE == Process_Export(pR700AsmCode,
6400 SQ_EXPORT_PIXEL,
6401 0,
6402 1,
6403 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
6404 GL_TRUE))
6405 {
6406 return GL_FALSE;
6407 }
6408 export_count++;
6409 }
6410 /* Need to export something, otherwise we'll hang
6411 * results are undefined anyway */
6412 if(export_count == 0)
6413 {
6414 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
6415 }
6416
6417 if(pR700AsmCode->cf_last_export_ptr != NULL)
6418 {
6419 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6420 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6421 }
6422
6423 return GL_TRUE;
6424 }
6425
6426 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
6427 GLbitfield OutputsWritten)
6428 {
6429 unsigned int unBit;
6430 unsigned int i;
6431
6432 GLuint export_starting_index = 0;
6433 GLuint export_count = pR700AsmCode->number_of_exports;
6434
6435 unBit = 1 << VERT_RESULT_HPOS;
6436 if(OutputsWritten & unBit)
6437 {
6438 if( GL_FALSE == Process_Export(pR700AsmCode,
6439 SQ_EXPORT_POS,
6440 export_starting_index,
6441 1,
6442 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
6443 GL_FALSE) )
6444 {
6445 return GL_FALSE;
6446 }
6447
6448 export_count--;
6449
6450 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6451 }
6452
6453 pR700AsmCode->number_of_exports = export_count;
6454
6455 unBit = 1 << VERT_RESULT_COL0;
6456 if(OutputsWritten & unBit)
6457 {
6458 if( GL_FALSE == Process_Export(pR700AsmCode,
6459 SQ_EXPORT_PARAM,
6460 export_starting_index,
6461 1,
6462 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
6463 GL_FALSE) )
6464 {
6465 return GL_FALSE;
6466 }
6467
6468 export_starting_index++;
6469 }
6470
6471 unBit = 1 << VERT_RESULT_COL1;
6472 if(OutputsWritten & unBit)
6473 {
6474 if( GL_FALSE == Process_Export(pR700AsmCode,
6475 SQ_EXPORT_PARAM,
6476 export_starting_index,
6477 1,
6478 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
6479 GL_FALSE) )
6480 {
6481 return GL_FALSE;
6482 }
6483
6484 export_starting_index++;
6485 }
6486
6487 unBit = 1 << VERT_RESULT_FOGC;
6488 if(OutputsWritten & unBit)
6489 {
6490 if( GL_FALSE == Process_Export(pR700AsmCode,
6491 SQ_EXPORT_PARAM,
6492 export_starting_index,
6493 1,
6494 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
6495 GL_FALSE) )
6496 {
6497 return GL_FALSE;
6498 }
6499
6500 export_starting_index++;
6501 }
6502
6503 for(i=0; i<8; i++)
6504 {
6505 unBit = 1 << (VERT_RESULT_TEX0 + i);
6506 if(OutputsWritten & unBit)
6507 {
6508 if( GL_FALSE == Process_Export(pR700AsmCode,
6509 SQ_EXPORT_PARAM,
6510 export_starting_index,
6511 1,
6512 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
6513 GL_FALSE) )
6514 {
6515 return GL_FALSE;
6516 }
6517
6518 export_starting_index++;
6519 }
6520 }
6521
6522 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
6523 {
6524 unBit = 1 << i;
6525 if(OutputsWritten & unBit)
6526 {
6527 if( GL_FALSE == Process_Export(pR700AsmCode,
6528 SQ_EXPORT_PARAM,
6529 export_starting_index,
6530 1,
6531 pR700AsmCode->ucVP_OutputMap[i],
6532 GL_FALSE) )
6533 {
6534 return GL_FALSE;
6535 }
6536
6537 export_starting_index++;
6538 }
6539 }
6540
6541 // At least one param should be exported
6542 if (export_count)
6543 {
6544 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6545 }
6546 else
6547 {
6548 if( GL_FALSE == Process_Export(pR700AsmCode,
6549 SQ_EXPORT_PARAM,
6550 0,
6551 1,
6552 pR700AsmCode->starting_export_register_number,
6553 GL_FALSE) )
6554 {
6555 return GL_FALSE;
6556 }
6557
6558 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
6559 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
6560 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
6561 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
6562 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6563 }
6564
6565 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6566
6567 return GL_TRUE;
6568 }
6569
6570 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
6571 {
6572 FREE(pR700AsmCode->pucOutMask);
6573 FREE(pR700AsmCode->pInstDeps);
6574
6575 if(NULL != pR700AsmCode->subs)
6576 {
6577 FREE(pR700AsmCode->subs);
6578 }
6579 if(NULL != pR700AsmCode->callers)
6580 {
6581 FREE(pR700AsmCode->callers);
6582 }
6583
6584 if(NULL != pR700AsmCode->presubs)
6585 {
6586 FREE(pR700AsmCode->presubs);
6587 }
6588
6589 return GL_TRUE;
6590 }
6591