Merge branch 'mesa_7_7_branch'
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35 #include "shader/prog_parameter.h"
36
37 #include "radeon_debug.h"
38 #include "r600_context.h"
39
40 #include "r700_assembler.h"
41
42 #define USE_CF_FOR_CONTINUE_BREAK 1
43 #define USE_CF_FOR_POP_AFTER 1
44
45 struct prog_instruction noise1_insts[12] = {
46 {OPCODE_BGNSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
47 {OPCODE_MOV , {{0, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 2, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
48 {OPCODE_MOV , {{8, 0, 0, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 4, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
49 {OPCODE_MOV , {{8, 0, 585, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 8, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
50 {OPCODE_SGT , {{0, 0, 585, 0, 0, 0}, {8, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 1, 1, 0, 8, 1672, 0}, 1, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
51 {OPCODE_IF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 7, 0, 0}, 0, 0, 0, 1, 0, 0, 0, 15, 0, 0, 0},
52 {OPCODE_MOV , {{0, 0, 1755, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
53 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
54 {OPCODE_ENDIF , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
55 {OPCODE_MOV , {{0, 0, 1170, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {0, 0, 1, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
56 {OPCODE_RET , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0},
57 {OPCODE_ENDSUB , {{13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}, {13, 0, 1672, 0, 0, 0}}, {13, 0, 15, 0, 8, 1672, 0}, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0}
58 };
59 float noise1_const[2][4] = {
60 {0.300000f, 0.900000f, 0.500000f, 0.300000f}
61 };
62
63 COMPILED_SUB noise1_presub = {
64 &(noise1_insts[0]),
65 12,
66 2,
67 1,
68 0,
69 &(noise1_const[0]),
70 SWIZZLE_X,
71 SWIZZLE_X,
72 SWIZZLE_X,
73 SWIZZLE_X,
74 {0,0,0},
75 0
76 };
77
78 BITS addrmode_PVSDST(PVSDST * pPVSDST)
79 {
80 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
81 }
82
83 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
84 {
85 pPVSDST->addrmode0 = addrmode & 1;
86 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
87 }
88
89 void nomask_PVSDST(PVSDST * pPVSDST)
90 {
91 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
92 }
93
94 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
95 {
96 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
97 }
98
99 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
100 {
101 pPVSSRC->addrmode0 = addrmode & 1;
102 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
103 }
104
105
106 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
107 {
108 pPVSSRC->swizzlex =
109 pPVSSRC->swizzley =
110 pPVSSRC->swizzlez =
111 pPVSSRC->swizzlew = swz;
112 }
113
114 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
115 {
116 pPVSSRC->swizzlex = SQ_SEL_X;
117 pPVSSRC->swizzley = SQ_SEL_Y;
118 pPVSSRC->swizzlez = SQ_SEL_Z;
119 pPVSSRC->swizzlew = SQ_SEL_W;
120 }
121
122 void
123 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
124 {
125 switch (x)
126 {
127 case SQ_SEL_X: x = pPVSSRC->swizzlex;
128 break;
129 case SQ_SEL_Y: x = pPVSSRC->swizzley;
130 break;
131 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
132 break;
133 case SQ_SEL_W: x = pPVSSRC->swizzlew;
134 break;
135 default:;
136 }
137
138 switch (y)
139 {
140 case SQ_SEL_X: y = pPVSSRC->swizzlex;
141 break;
142 case SQ_SEL_Y: y = pPVSSRC->swizzley;
143 break;
144 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
145 break;
146 case SQ_SEL_W: y = pPVSSRC->swizzlew;
147 break;
148 default:;
149 }
150
151 switch (z)
152 {
153 case SQ_SEL_X: z = pPVSSRC->swizzlex;
154 break;
155 case SQ_SEL_Y: z = pPVSSRC->swizzley;
156 break;
157 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
158 break;
159 case SQ_SEL_W: z = pPVSSRC->swizzlew;
160 break;
161 default:;
162 }
163
164 switch (w)
165 {
166 case SQ_SEL_X: w = pPVSSRC->swizzlex;
167 break;
168 case SQ_SEL_Y: w = pPVSSRC->swizzley;
169 break;
170 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
171 break;
172 case SQ_SEL_W: w = pPVSSRC->swizzlew;
173 break;
174 default:;
175 }
176
177 pPVSSRC->swizzlex = x;
178 pPVSSRC->swizzley = y;
179 pPVSSRC->swizzlez = z;
180 pPVSSRC->swizzlew = w;
181 }
182
183 void neg_PVSSRC(PVSSRC* pPVSSRC)
184 {
185 pPVSSRC->negx = 1;
186 pPVSSRC->negy = 1;
187 pPVSSRC->negz = 1;
188 pPVSSRC->negw = 1;
189 }
190
191 void noneg_PVSSRC(PVSSRC* pPVSSRC)
192 {
193 pPVSSRC->negx = 0;
194 pPVSSRC->negy = 0;
195 pPVSSRC->negz = 0;
196 pPVSSRC->negw = 0;
197 }
198
199 // negate argument (for SUB instead of ADD and alike)
200 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
201 {
202 pPVSSRC->negx = !pPVSSRC->negx;
203 pPVSSRC->negy = !pPVSSRC->negy;
204 pPVSSRC->negz = !pPVSSRC->negz;
205 pPVSSRC->negw = !pPVSSRC->negw;
206 }
207
208 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
209 {
210 switch (c)
211 {
212 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
213 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
214 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
215 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
216 default:;
217 }
218 }
219
220 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
221 {
222 switch (c)
223 {
224 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
225 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
226 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
227 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
228 default:;
229 }
230 }
231
232 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
233 {
234 return (pOutVTXFmt0->point_size |
235 pOutVTXFmt0->edge_flag |
236 pOutVTXFmt0->rta_index |
237 pOutVTXFmt0->kill_flag |
238 pOutVTXFmt0->viewport_index);
239 }
240
241 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
242 {
243 return (pFPOutFmt->depth |
244 pFPOutFmt->stencil_ref |
245 pFPOutFmt->mask |
246 pFPOutFmt->coverage_to_mask);
247 }
248
249 GLboolean is_reduction_opcode(PVSDWORD* dest)
250 {
251 if (dest->dst.op3 == 0)
252 {
253 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
254 {
255 return GL_TRUE;
256 }
257 }
258 return GL_FALSE;
259 }
260
261 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
262 {
263 GLuint format = FMT_INVALID;
264 GLuint uiElemSize = 0;
265
266 switch (eType)
267 {
268 case GL_BYTE:
269 case GL_UNSIGNED_BYTE:
270 uiElemSize = 1;
271 switch(nChannels)
272 {
273 case 1:
274 format = FMT_8; break;
275 case 2:
276 format = FMT_8_8; break;
277 case 3:
278 format = FMT_8_8_8; break;
279 case 4:
280 format = FMT_8_8_8_8; break;
281 default:
282 break;
283 }
284 break;
285
286 case GL_UNSIGNED_SHORT:
287 case GL_SHORT:
288 uiElemSize = 2;
289 switch(nChannels)
290 {
291 case 1:
292 format = FMT_16; break;
293 case 2:
294 format = FMT_16_16; break;
295 case 3:
296 format = FMT_16_16_16; break;
297 case 4:
298 format = FMT_16_16_16_16; break;
299 default:
300 break;
301 }
302 break;
303
304 case GL_UNSIGNED_INT:
305 case GL_INT:
306 uiElemSize = 4;
307 switch(nChannels)
308 {
309 case 1:
310 format = FMT_32; break;
311 case 2:
312 format = FMT_32_32; break;
313 case 3:
314 format = FMT_32_32_32; break;
315 case 4:
316 format = FMT_32_32_32_32; break;
317 default:
318 break;
319 }
320 break;
321
322 case GL_FLOAT:
323 uiElemSize = 4;
324 switch(nChannels)
325 {
326 case 1:
327 format = FMT_32_FLOAT; break;
328 case 2:
329 format = FMT_32_32_FLOAT; break;
330 case 3:
331 format = FMT_32_32_32_FLOAT; break;
332 case 4:
333 format = FMT_32_32_32_32_FLOAT; break;
334 default:
335 break;
336 }
337 break;
338 case GL_DOUBLE:
339 uiElemSize = 8;
340 switch(nChannels)
341 {
342 case 1:
343 format = FMT_32_FLOAT; break;
344 case 2:
345 format = FMT_32_32_FLOAT; break;
346 case 3:
347 format = FMT_32_32_32_FLOAT; break;
348 case 4:
349 format = FMT_32_32_32_32_FLOAT; break;
350 default:
351 break;
352 }
353 break;
354 default:
355 ;
356 //GL_ASSERT_NO_CASE();
357 }
358
359 if(NULL != pClient_size)
360 {
361 *pClient_size = uiElemSize * nChannels;
362 }
363
364 return(format);
365 }
366
367 unsigned int r700GetNumOperands(GLuint opcode, GLuint nIsOp3)
368 {
369 if(nIsOp3 > 0)
370 {
371 return 3;
372 }
373
374 switch (opcode)
375 {
376 case SQ_OP2_INST_ADD:
377 case SQ_OP2_INST_KILLE:
378 case SQ_OP2_INST_KILLGT:
379 case SQ_OP2_INST_KILLGE:
380 case SQ_OP2_INST_KILLNE:
381 case SQ_OP2_INST_MUL:
382 case SQ_OP2_INST_MAX:
383 case SQ_OP2_INST_MIN:
384 //case SQ_OP2_INST_MAX_DX10:
385 //case SQ_OP2_INST_MIN_DX10:
386 case SQ_OP2_INST_SETE:
387 case SQ_OP2_INST_SETNE:
388 case SQ_OP2_INST_SETGT:
389 case SQ_OP2_INST_SETGE:
390 case SQ_OP2_INST_PRED_SETE:
391 case SQ_OP2_INST_PRED_SETGT:
392 case SQ_OP2_INST_PRED_SETGE:
393 case SQ_OP2_INST_PRED_SETNE:
394 case SQ_OP2_INST_DOT4:
395 case SQ_OP2_INST_DOT4_IEEE:
396 case SQ_OP2_INST_CUBE:
397 return 2;
398
399 case SQ_OP2_INST_MOV:
400 case SQ_OP2_INST_MOVA_FLOOR:
401 case SQ_OP2_INST_FRACT:
402 case SQ_OP2_INST_FLOOR:
403 case SQ_OP2_INST_TRUNC:
404 case SQ_OP2_INST_EXP_IEEE:
405 case SQ_OP2_INST_LOG_CLAMPED:
406 case SQ_OP2_INST_LOG_IEEE:
407 case SQ_OP2_INST_RECIP_IEEE:
408 case SQ_OP2_INST_RECIPSQRT_IEEE:
409 case SQ_OP2_INST_FLT_TO_INT:
410 case SQ_OP2_INST_SIN:
411 case SQ_OP2_INST_COS:
412 return 1;
413
414 default: radeon_error(
415 "Need instruction operand number for %x.\n", opcode);
416 };
417
418 return 3;
419 }
420
421 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
422 {
423 GLuint i;
424
425 Init_R700_Shader(pShader);
426 pAsm->pR700Shader = pShader;
427 pAsm->currentShaderType = spt;
428
429 pAsm->cf_last_export_ptr = NULL;
430
431 pAsm->cf_current_export_clause_ptr = NULL;
432 pAsm->cf_current_alu_clause_ptr = NULL;
433 pAsm->cf_current_tex_clause_ptr = NULL;
434 pAsm->cf_current_vtx_clause_ptr = NULL;
435 pAsm->cf_current_cf_clause_ptr = NULL;
436
437 // No clause has been created yet
438 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
439
440 pAsm->number_of_colorandz_exports = 0;
441 pAsm->number_of_exports = 0;
442 pAsm->number_of_export_opcodes = 0;
443
444 pAsm->alu_x_opcode = 0;
445
446 pAsm->D2.bits = 0;
447
448 pAsm->D.bits = 0;
449 pAsm->S[0].bits = 0;
450 pAsm->S[1].bits = 0;
451 pAsm->S[2].bits = 0;
452
453 pAsm->uLastPosUpdate = 0;
454
455 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
456
457 pAsm->uIIns = 0;
458 pAsm->uOIns = 0;
459 pAsm->number_used_registers = 0;
460 pAsm->uUsedConsts = 256;
461
462
463 // Fragment programs
464 pAsm->uBoolConsts = 0;
465 pAsm->uIntConsts = 0;
466 pAsm->uInsts = 0;
467 pAsm->uConsts = 0;
468
469 pAsm->FCSP = 0;
470 pAsm->fc_stack[0].type = FC_NONE;
471
472 pAsm->aArgSubst[0] =
473 pAsm->aArgSubst[1] =
474 pAsm->aArgSubst[2] =
475 pAsm->aArgSubst[3] = (-1);
476
477 pAsm->uOutputs = 0;
478
479 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
480 {
481 pAsm->color_export_register_number[i] = (-1);
482 }
483
484
485 pAsm->depth_export_register_number = (-1);
486 pAsm->stencil_export_register_number = (-1);
487 pAsm->coverage_to_mask_export_register_number = (-1);
488 pAsm->mask_export_register_number = (-1);
489
490 pAsm->starting_export_register_number = 0;
491 pAsm->starting_vfetch_register_number = 0;
492 pAsm->starting_temp_register_number = 0;
493 pAsm->uFirstHelpReg = 0;
494
495 pAsm->input_position_is_used = GL_FALSE;
496 pAsm->input_normal_is_used = GL_FALSE;
497
498 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
499 {
500 pAsm->input_color_is_used[ i ] = GL_FALSE;
501 }
502
503 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
504 {
505 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
506 }
507
508 for (i=0; i<VERT_ATTRIB_MAX; i++)
509 {
510 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
511 }
512
513 pAsm->number_of_inputs = 0;
514
515 pAsm->is_tex = GL_FALSE;
516 pAsm->need_tex_barrier = GL_FALSE;
517
518 pAsm->subs = NULL;
519 pAsm->unSubArraySize = 0;
520 pAsm->unSubArrayPointer = 0;
521 pAsm->callers = NULL;
522 pAsm->unCallerArraySize = 0;
523 pAsm->unCallerArrayPointer = 0;
524
525 pAsm->CALLSP = 0;
526 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
527 pAsm->CALLSTACK[0].plstCFInstructions_local
528 = &(pAsm->pR700Shader->lstCFInstructions);
529
530 pAsm->CALLSTACK[0].max = 0;
531 pAsm->CALLSTACK[0].current = 0;
532
533 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
534
535 pAsm->unCFflags = 0;
536
537 pAsm->presubs = NULL;
538 pAsm->unPresubArraySize = 0;
539 pAsm->unNumPresub = 0;
540 pAsm->unCurNumILInsts = 0;
541
542 pAsm->unVetTexBits = 0;
543
544 return 0;
545 }
546
547 GLboolean IsTex(gl_inst_opcode Opcode)
548 {
549 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) ||
550 (OPCODE_DDX==Opcode) || (OPCODE_DDY==Opcode) )
551 {
552 return GL_TRUE;
553 }
554 return GL_FALSE;
555 }
556
557 GLboolean IsAlu(gl_inst_opcode Opcode)
558 {
559 //TODO : more for fc and ex for higher spec.
560 if( IsTex(Opcode) )
561 {
562 return GL_FALSE;
563 }
564 return GL_TRUE;
565 }
566
567 int check_current_clause(r700_AssemblerBase* pAsm,
568 CF_CLAUSE_TYPE new_clause_type)
569 {
570 if (pAsm->cf_current_clause_type != new_clause_type)
571 { //Close last open clause
572 switch (pAsm->cf_current_clause_type)
573 {
574 case CF_ALU_CLAUSE:
575 if ( pAsm->cf_current_alu_clause_ptr != NULL)
576 {
577 pAsm->cf_current_alu_clause_ptr = NULL;
578 }
579 break;
580 case CF_VTX_CLAUSE:
581 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
582 {
583 pAsm->cf_current_vtx_clause_ptr = NULL;
584 }
585 break;
586 case CF_TEX_CLAUSE:
587 if ( pAsm->cf_current_tex_clause_ptr != NULL)
588 {
589 pAsm->cf_current_tex_clause_ptr = NULL;
590 }
591 break;
592 case CF_EXPORT_CLAUSE:
593 if ( pAsm->cf_current_export_clause_ptr != NULL)
594 {
595 pAsm->cf_current_export_clause_ptr = NULL;
596 }
597 break;
598 case CF_OTHER_CLAUSE:
599 if ( pAsm->cf_current_cf_clause_ptr != NULL)
600 {
601 pAsm->cf_current_cf_clause_ptr = NULL;
602 }
603 break;
604 case CF_EMPTY_CLAUSE:
605 break;
606 default:
607 radeon_error(
608 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
609 return GL_FALSE;
610 }
611
612 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
613
614 // Create new clause
615 switch (new_clause_type)
616 {
617 case CF_ALU_CLAUSE:
618 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
619 break;
620 case CF_VTX_CLAUSE:
621 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
622 break;
623 case CF_TEX_CLAUSE:
624 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
625 break;
626 case CF_EXPORT_CLAUSE:
627 {
628 R700ControlFlowSXClause* pR700ControlFlowSXClause
629 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
630
631 // Add new export instruction to control flow program
632 if (pR700ControlFlowSXClause != 0)
633 {
634 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
635 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
636 AddCFInstruction( pAsm->pR700Shader,
637 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
638 }
639 else
640 {
641 radeon_error(
642 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
643 return GL_FALSE;
644 }
645 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
646 }
647 break;
648 case CF_EMPTY_CLAUSE:
649 break;
650 case CF_OTHER_CLAUSE:
651 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
652 break;
653 default:
654 radeon_error(
655 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
656 return GL_FALSE;
657 }
658 }
659
660 return GL_TRUE;
661 }
662
663 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
664 {
665 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
666 {
667 return GL_FALSE;
668 }
669
670 pAsm->cf_current_cf_clause_ptr =
671 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
672
673 if (pAsm->cf_current_cf_clause_ptr != NULL)
674 {
675 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
676 AddCFInstruction( pAsm->pR700Shader,
677 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
678 }
679 else
680 {
681 radeon_error("Could not allocate a new VFetch CF instruction.\n");
682 return GL_FALSE;
683 }
684
685 return GL_TRUE;
686 }
687
688 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
689 R700VertexInstruction* vertex_instruction_ptr)
690 {
691 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
692 {
693 return GL_FALSE;
694 }
695
696 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
697 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
698 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
699 ) )
700 {
701 // Create new Vfetch control flow instruction for this new clause
702 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
703
704 if (pAsm->cf_current_vtx_clause_ptr != NULL)
705 {
706 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
707 AddCFInstruction( pAsm->pR700Shader,
708 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
709 }
710 else
711 {
712 radeon_error("Could not allocate a new VFetch CF instruction.\n");
713 return GL_FALSE;
714 }
715
716 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
717 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
718 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
719 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
720 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
721 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
722 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
723 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
724 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
725
726 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
727 }
728 else
729 {
730 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
731 }
732
733 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
734
735 return GL_TRUE;
736 }
737
738 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
739 R700TextureInstruction* tex_instruction_ptr)
740 {
741 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
742 {
743 return GL_FALSE;
744 }
745
746 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
747 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
748 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
749 ) )
750 {
751 // new tex cf instruction for this new clause
752 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
753
754 if (pAsm->cf_current_tex_clause_ptr != NULL)
755 {
756 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
757 AddCFInstruction( pAsm->pR700Shader,
758 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
759 }
760 else
761 {
762 radeon_error("Could not allocate a new TEX CF instruction.\n");
763 return GL_FALSE;
764 }
765
766 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
767 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
768 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
769
770 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
771 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
772 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
773 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
774 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
775 }
776 else
777 {
778 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
779 }
780
781 // If this clause constains any TEX instruction that is dependent on a previous instruction,
782 // set the barrier bit
783 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
784 {
785 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
786 }
787
788 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
789 {
790 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
791 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
792 }
793
794 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
795
796 return GL_TRUE;
797 }
798
799 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
800 GLuint gl_client_id,
801 GLuint destination_register,
802 GLuint number_of_elements,
803 GLenum dataElementType,
804 VTX_FETCH_METHOD* pFetchMethod)
805 {
806 GLuint client_size_inbyte;
807 GLuint data_format;
808 GLuint mega_fetch_count;
809 GLuint is_mega_fetch_flag;
810
811 R700VertexGenericFetch* vfetch_instruction_ptr;
812 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
813
814 if (assembled_vfetch_instruction_ptr == NULL)
815 {
816 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
817 if (vfetch_instruction_ptr == NULL)
818 {
819 return GL_FALSE;
820 }
821 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
822 }
823 else
824 {
825 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
826 }
827
828 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
829
830 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
831 {
832 //TODO : mini fetch
833 }
834 else
835 {
836 mega_fetch_count = MEGA_FETCH_BYTES - 1;
837 is_mega_fetch_flag = 0x1;
838 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
839 }
840
841 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
842 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
843 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
844
845 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
846 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
847 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
848 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
849 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
850
851 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
852 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
853 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
854 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
855
856 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
857
858 // Destination register
859 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
860 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
861
862 vfetch_instruction_ptr->m_Word2.f.offset = 0;
863 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
864
865 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
866
867 if (assembled_vfetch_instruction_ptr == NULL)
868 {
869 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
870 {
871 return GL_FALSE;
872 }
873
874 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
875 {
876 return GL_FALSE;
877 }
878 else
879 {
880 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
881 }
882 }
883
884 return GL_TRUE;
885 }
886
887 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
888 GLuint destination_register,
889 GLenum type,
890 GLint size,
891 GLubyte element,
892 GLuint _signed,
893 GLboolean normalize,
894 GLenum format,
895 VTX_FETCH_METHOD * pFetchMethod)
896 {
897 GLuint client_size_inbyte;
898 GLuint data_format;
899 GLuint mega_fetch_count;
900 GLuint is_mega_fetch_flag;
901
902 R700VertexGenericFetch* vfetch_instruction_ptr;
903 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
904 = pAsm->vfetch_instruction_ptr_array[element];
905
906 if (assembled_vfetch_instruction_ptr == NULL)
907 {
908 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
909 if (vfetch_instruction_ptr == NULL)
910 {
911 return GL_FALSE;
912 }
913 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
914 }
915 else
916 {
917 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
918 }
919
920 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
921
922 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
923 {
924 //TODO : mini fetch
925 }
926 else
927 {
928 mega_fetch_count = MEGA_FETCH_BYTES - 1;
929 is_mega_fetch_flag = 0x1;
930 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
931 }
932
933 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
934 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
935 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
936
937 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
938 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
939 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
940 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
941 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
942
943 if(format == GL_BGRA)
944 {
945 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_Z;
946 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
947 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_X;
948 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
949 }
950 else
951 {
952 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
953 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
954 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
955 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
956
957 }
958
959 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
960 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
961 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
962
963 if(1 == _signed)
964 {
965 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
966 }
967 else
968 {
969 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
970 }
971
972 if(GL_TRUE == normalize)
973 {
974 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
975 }
976 else
977 {
978 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
979 }
980
981 // Destination register
982 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
983 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
984
985 vfetch_instruction_ptr->m_Word2.f.offset = 0;
986 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
987
988 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
989
990 if (assembled_vfetch_instruction_ptr == NULL)
991 {
992 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
993 {
994 return GL_FALSE;
995 }
996
997 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
998 {
999 return GL_FALSE;
1000 }
1001 else
1002 {
1003 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
1004 }
1005 }
1006
1007 return GL_TRUE;
1008 }
1009
1010 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
1011 {
1012 GLint i;
1013 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
1014 pAsm->cf_current_vtx_clause_ptr = NULL;
1015
1016 for (i=0; i<VERT_ATTRIB_MAX; i++)
1017 {
1018 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
1019 }
1020
1021 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
1022
1023 return GL_TRUE;
1024 }
1025
1026 GLuint gethelpr(r700_AssemblerBase* pAsm)
1027 {
1028 GLuint r = pAsm->uHelpReg;
1029 pAsm->uHelpReg++;
1030 if (pAsm->uHelpReg > pAsm->number_used_registers)
1031 {
1032 pAsm->number_used_registers = pAsm->uHelpReg;
1033 }
1034 return r;
1035 }
1036 void resethelpr(r700_AssemblerBase* pAsm)
1037 {
1038 pAsm->uHelpReg = pAsm->uFirstHelpReg;
1039 }
1040
1041 void checkop_init(r700_AssemblerBase* pAsm)
1042 {
1043 resethelpr(pAsm);
1044 pAsm->aArgSubst[0] =
1045 pAsm->aArgSubst[1] =
1046 pAsm->aArgSubst[2] =
1047 pAsm->aArgSubst[3] = -1;
1048 }
1049
1050 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
1051 {
1052 GLuint tmp = gethelpr(pAsm);
1053
1054 //mov src to temp helper gpr.
1055 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1056
1057 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1058
1059 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1060 pAsm->D.dst.reg = tmp;
1061
1062 nomask_PVSDST(&(pAsm->D.dst));
1063
1064 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1065 {
1066 return GL_FALSE;
1067 }
1068
1069 noswizzle_PVSSRC(&(pAsm->S[0].src));
1070 noneg_PVSSRC(&(pAsm->S[0].src));
1071
1072 if( GL_FALSE == next_ins(pAsm) )
1073 {
1074 return GL_FALSE;
1075 }
1076
1077 pAsm->aArgSubst[1 + src] = tmp;
1078
1079 return GL_TRUE;
1080 }
1081
1082 GLboolean checkop1(r700_AssemblerBase* pAsm)
1083 {
1084 checkop_init(pAsm);
1085 return GL_TRUE;
1086 }
1087
1088 GLboolean checkop2(r700_AssemblerBase* pAsm)
1089 {
1090 GLboolean bSrcConst[2];
1091 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1092
1093 checkop_init(pAsm);
1094
1095 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1096 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1097 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1098 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1099 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1100 {
1101 bSrcConst[0] = GL_TRUE;
1102 }
1103 else
1104 {
1105 bSrcConst[0] = GL_FALSE;
1106 }
1107 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1108 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1109 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1110 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1111 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1112 {
1113 bSrcConst[1] = GL_TRUE;
1114 }
1115 else
1116 {
1117 bSrcConst[1] = GL_FALSE;
1118 }
1119
1120 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1121 {
1122 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1123 {
1124 if( GL_FALSE == mov_temp(pAsm, 1) )
1125 {
1126 return GL_FALSE;
1127 }
1128 }
1129 }
1130
1131 return GL_TRUE;
1132 }
1133
1134 GLboolean checkop3(r700_AssemblerBase* pAsm)
1135 {
1136 GLboolean bSrcConst[3];
1137 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1138
1139 checkop_init(pAsm);
1140
1141 if( (pILInst->SrcReg[0].File == PROGRAM_UNIFORM) ||
1142 (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1143 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1144 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1145 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1146 {
1147 bSrcConst[0] = GL_TRUE;
1148 }
1149 else
1150 {
1151 bSrcConst[0] = GL_FALSE;
1152 }
1153 if( (pILInst->SrcReg[1].File == PROGRAM_UNIFORM) ||
1154 (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1155 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1156 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1157 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1158 {
1159 bSrcConst[1] = GL_TRUE;
1160 }
1161 else
1162 {
1163 bSrcConst[1] = GL_FALSE;
1164 }
1165 if( (pILInst->SrcReg[2].File == PROGRAM_UNIFORM) ||
1166 (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1167 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1168 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1169 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1170 {
1171 bSrcConst[2] = GL_TRUE;
1172 }
1173 else
1174 {
1175 bSrcConst[2] = GL_FALSE;
1176 }
1177
1178 if( (GL_TRUE == bSrcConst[0]) &&
1179 (GL_TRUE == bSrcConst[1]) &&
1180 (GL_TRUE == bSrcConst[2]) )
1181 {
1182 if( GL_FALSE == mov_temp(pAsm, 1) )
1183 {
1184 return GL_FALSE;
1185 }
1186 if( GL_FALSE == mov_temp(pAsm, 2) )
1187 {
1188 return GL_FALSE;
1189 }
1190
1191 return GL_TRUE;
1192 }
1193 else if( (GL_TRUE == bSrcConst[0]) &&
1194 (GL_TRUE == bSrcConst[1]) )
1195 {
1196 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1197 {
1198 if( GL_FALSE == mov_temp(pAsm, 1) )
1199 {
1200 return 1;
1201 }
1202 }
1203
1204 return GL_TRUE;
1205 }
1206 else if ( (GL_TRUE == bSrcConst[0]) &&
1207 (GL_TRUE == bSrcConst[2]) )
1208 {
1209 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1210 {
1211 if( GL_FALSE == mov_temp(pAsm, 2) )
1212 {
1213 return GL_FALSE;
1214 }
1215 }
1216
1217 return GL_TRUE;
1218 }
1219 else if( (GL_TRUE == bSrcConst[1]) &&
1220 (GL_TRUE == bSrcConst[2]) )
1221 {
1222 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1223 {
1224 if( GL_FALSE == mov_temp(pAsm, 2) )
1225 {
1226 return GL_FALSE;
1227 }
1228 }
1229
1230 return GL_TRUE;
1231 }
1232
1233 return GL_TRUE;
1234 }
1235
1236 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1237 int src,
1238 int fld)
1239 {
1240 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1241
1242 if (fld == -1)
1243 {
1244 fld = src;
1245 }
1246
1247 if(pAsm->aArgSubst[1+src] >= 0)
1248 {
1249 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1250 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1251 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1252 }
1253 else
1254 {
1255 switch (pILInst->SrcReg[src].File)
1256 {
1257 case PROGRAM_TEMPORARY:
1258 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1259 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1260 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1261 break;
1262 case PROGRAM_CONSTANT:
1263 case PROGRAM_LOCAL_PARAM:
1264 case PROGRAM_ENV_PARAM:
1265 case PROGRAM_STATE_VAR:
1266 case PROGRAM_UNIFORM:
1267 if (1 == pILInst->SrcReg[src].RelAddr)
1268 {
1269 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1270 }
1271 else
1272 {
1273 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1274 }
1275
1276 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1277 if(pILInst->SrcReg[src].Index < 0)
1278 {
1279 WARN_ONCE("Negative register offsets not supported yet!\n");
1280 pAsm->S[fld].src.reg = 0;
1281 }
1282 else
1283 {
1284 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1285 }
1286 break;
1287 case PROGRAM_INPUT:
1288 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1289 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1290 switch (pAsm->currentShaderType)
1291 {
1292 case SPT_FP:
1293 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1294 break;
1295 case SPT_VP:
1296 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1297 break;
1298 }
1299 break;
1300 default:
1301 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1302 return GL_FALSE;
1303 }
1304 }
1305
1306 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1307 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1308 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1309 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1310
1311 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1312 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1313 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1314 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1315
1316 return GL_TRUE;
1317 }
1318
1319 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1320 {
1321 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1322 switch (pILInst->DstReg.File)
1323 {
1324 case PROGRAM_TEMPORARY:
1325 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1326 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1327 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1328 break;
1329 case PROGRAM_ADDRESS:
1330 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1331 pAsm->D.dst.rtype = DST_REG_A0;
1332 pAsm->D.dst.reg = 0;
1333 break;
1334 case PROGRAM_OUTPUT:
1335 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1336 pAsm->D.dst.rtype = DST_REG_OUT;
1337 switch (pAsm->currentShaderType)
1338 {
1339 case SPT_FP:
1340 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1341 break;
1342 case SPT_VP:
1343 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1344 break;
1345 }
1346 break;
1347 default:
1348 radeon_error("Invalid destination output argument type\n");
1349 return GL_FALSE;
1350 }
1351
1352 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1353 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1354 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1355 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1356
1357 if(pILInst->SaturateMode == SATURATE_ZERO_ONE)
1358 {
1359 pAsm->D2.dst2.SaturateMode = 1;
1360 }
1361 else
1362 {
1363 pAsm->D2.dst2.SaturateMode = 0;
1364 }
1365
1366 return GL_TRUE;
1367 }
1368
1369 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1370 {
1371 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1372
1373 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1374 {
1375 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1376 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1377
1378 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1379 }
1380 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1381 {
1382 pAsm->D.dst.rtype = DST_REG_OUT;
1383 switch (pAsm->currentShaderType)
1384 {
1385 case SPT_FP:
1386 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1387 break;
1388 case SPT_VP:
1389 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1390 break;
1391 }
1392
1393 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1394 }
1395 else
1396 {
1397 radeon_error("Invalid destination output argument type\n");
1398 return GL_FALSE;
1399 }
1400
1401 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1402 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1403 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1404 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1405
1406 return GL_TRUE;
1407 }
1408
1409 GLboolean tex_src(r700_AssemblerBase *pAsm)
1410 {
1411 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1412
1413 GLboolean bValidTexCoord = GL_FALSE;
1414
1415 if(pAsm->aArgSubst[1] >= 0)
1416 {
1417 bValidTexCoord = GL_TRUE;
1418 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1419 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1420 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1421 }
1422 else
1423 {
1424 switch (pILInst->SrcReg[0].File) {
1425 case PROGRAM_UNIFORM:
1426 case PROGRAM_CONSTANT:
1427 case PROGRAM_LOCAL_PARAM:
1428 case PROGRAM_ENV_PARAM:
1429 case PROGRAM_STATE_VAR:
1430 break;
1431 case PROGRAM_TEMPORARY:
1432 bValidTexCoord = GL_TRUE;
1433 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1434 pAsm->starting_temp_register_number;
1435 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1436 break;
1437 case PROGRAM_INPUT:
1438 if(SPT_VP == pAsm->currentShaderType)
1439 {
1440 switch (pILInst->SrcReg[0].Index)
1441 {
1442 case VERT_ATTRIB_TEX0:
1443 case VERT_ATTRIB_TEX1:
1444 case VERT_ATTRIB_TEX2:
1445 case VERT_ATTRIB_TEX3:
1446 case VERT_ATTRIB_TEX4:
1447 case VERT_ATTRIB_TEX5:
1448 case VERT_ATTRIB_TEX6:
1449 case VERT_ATTRIB_TEX7:
1450 bValidTexCoord = GL_TRUE;
1451 pAsm->S[0].src.reg =
1452 pAsm->ucVP_AttributeMap[pILInst->SrcReg[0].Index];
1453 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1454 break;
1455 }
1456 }
1457 else
1458 {
1459 switch (pILInst->SrcReg[0].Index)
1460 {
1461 case FRAG_ATTRIB_WPOS:
1462 case FRAG_ATTRIB_COL0:
1463 case FRAG_ATTRIB_COL1:
1464 case FRAG_ATTRIB_FOGC:
1465 case FRAG_ATTRIB_TEX0:
1466 case FRAG_ATTRIB_TEX1:
1467 case FRAG_ATTRIB_TEX2:
1468 case FRAG_ATTRIB_TEX3:
1469 case FRAG_ATTRIB_TEX4:
1470 case FRAG_ATTRIB_TEX5:
1471 case FRAG_ATTRIB_TEX6:
1472 case FRAG_ATTRIB_TEX7:
1473 bValidTexCoord = GL_TRUE;
1474 pAsm->S[0].src.reg =
1475 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1476 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1477 break;
1478 case FRAG_ATTRIB_FACE:
1479 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1480 break;
1481 case FRAG_ATTRIB_PNTC:
1482 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1483 break;
1484 }
1485
1486 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1487 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1488 {
1489 bValidTexCoord = GL_TRUE;
1490 pAsm->S[0].src.reg =
1491 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1492 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1493 }
1494 }
1495
1496 break;
1497 }
1498 }
1499
1500 if(GL_TRUE == bValidTexCoord)
1501 {
1502 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1503 }
1504 else
1505 {
1506 radeon_error("Invalid source texcoord for TEX instruction\n");
1507 return GL_FALSE;
1508 }
1509
1510 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1511 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1512 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1513 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1514
1515 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1516 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1517 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1518 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1519
1520 return GL_TRUE;
1521 }
1522
1523 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1524 {
1525 PVSSRC * texture_coordinate_source;
1526 PVSSRC * texture_unit_source;
1527
1528 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1529 if (tex_instruction_ptr == NULL)
1530 {
1531 return GL_FALSE;
1532 }
1533 Init_R700TextureInstruction(tex_instruction_ptr);
1534
1535 texture_coordinate_source = &(pAsm->S[0].src);
1536 texture_unit_source = &(pAsm->S[1].src);
1537
1538 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1539 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1540 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1541 tex_instruction_ptr->m_Word0.f.alt_const = 0;
1542
1543 if(SPT_VP == pAsm->currentShaderType)
1544 {
1545 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg + VERT_ATTRIB_MAX;
1546 pAsm->unVetTexBits |= 1 << texture_unit_source->reg;
1547 }
1548 else
1549 {
1550 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1551 }
1552
1553 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1554 if (normalized) {
1555 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1556 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1557 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1558 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1559 } else {
1560 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1561 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1562 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1563 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1564 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1565 }
1566
1567 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1568 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1569 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1570 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1571
1572 // dst
1573 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1574 (pAsm->D.dst.rtype == DST_REG_OUT) )
1575 {
1576 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1577 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1578
1579 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1580 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1581
1582 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1583 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1584 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1585 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1586
1587
1588 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1589 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1590 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1591 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1592 }
1593 else
1594 {
1595 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1596 return GL_FALSE;
1597 }
1598
1599 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1600 {
1601 return GL_FALSE;
1602 }
1603
1604 return GL_TRUE;
1605 }
1606
1607 void initialize(r700_AssemblerBase *pAsm)
1608 {
1609 GLuint cycle, component;
1610
1611 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1612 {
1613 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1614 {
1615 pAsm->hw_gpr[cycle][component] = (-1);
1616 }
1617 }
1618 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1619 {
1620 pAsm->hw_cfile_addr[component] = (-1);
1621 pAsm->hw_cfile_chan[component] = (-1);
1622 }
1623 }
1624
1625 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1626 int source_index,
1627 PVSSRC* pSource,
1628 BITS scalar_channel_index)
1629 {
1630 BITS src_sel;
1631 BITS src_rel;
1632 BITS src_chan;
1633 BITS src_neg;
1634
1635 //--------------------------------------------------------------------------
1636 // Source for operands src0, src1.
1637 // Values [0,127] correspond to GPR[0..127].
1638 // Values [256,511] correspond to cfile constants c[0..255].
1639
1640 //--------------------------------------------------------------------------
1641 // Other special values are shown in the list below.
1642
1643 // 248 SQ_ALU_SRC_0: special constant 0.0.
1644 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1645
1646 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1647 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1648
1649 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1650 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1651
1652 // 254 SQ_ALU_SRC_PV: previous vector result.
1653 // 255 SQ_ALU_SRC_PS: previous scalar result.
1654 //--------------------------------------------------------------------------
1655
1656 BITS channel_swizzle;
1657 switch (scalar_channel_index)
1658 {
1659 case 0: channel_swizzle = pSource->swizzlex; break;
1660 case 1: channel_swizzle = pSource->swizzley; break;
1661 case 2: channel_swizzle = pSource->swizzlez; break;
1662 case 3: channel_swizzle = pSource->swizzlew; break;
1663 default: channel_swizzle = SQ_SEL_MASK; break;
1664 }
1665
1666 if(channel_swizzle == SQ_SEL_0)
1667 {
1668 src_sel = SQ_ALU_SRC_0;
1669 }
1670 else if (channel_swizzle == SQ_SEL_1)
1671 {
1672 src_sel = SQ_ALU_SRC_1;
1673 }
1674 else
1675 {
1676 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1677 (pSource->rtype == SRC_REG_INPUT)
1678 )
1679 {
1680 src_sel = pSource->reg;
1681 }
1682 else if (pSource->rtype == SRC_REG_CONSTANT)
1683 {
1684 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1685 }
1686 else if (pSource->rtype == SRC_REC_LITERAL)
1687 {
1688 src_sel = SQ_ALU_SRC_LITERAL;
1689 }
1690 else
1691 {
1692 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1693 source_index, pSource->rtype);
1694 return GL_FALSE;
1695 }
1696 }
1697
1698 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1699 {
1700 src_rel = SQ_ABSOLUTE;
1701 }
1702 else
1703 {
1704 src_rel = SQ_RELATIVE;
1705 }
1706
1707 switch (channel_swizzle)
1708 {
1709 case SQ_SEL_X:
1710 src_chan = SQ_CHAN_X;
1711 break;
1712 case SQ_SEL_Y:
1713 src_chan = SQ_CHAN_Y;
1714 break;
1715 case SQ_SEL_Z:
1716 src_chan = SQ_CHAN_Z;
1717 break;
1718 case SQ_SEL_W:
1719 src_chan = SQ_CHAN_W;
1720 break;
1721 case SQ_SEL_0:
1722 case SQ_SEL_1:
1723 // Does not matter since src_sel controls
1724 src_chan = SQ_CHAN_X;
1725 break;
1726 default:
1727 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1728 return GL_FALSE;
1729 break;
1730 }
1731
1732 switch (scalar_channel_index)
1733 {
1734 case 0: src_neg = pSource->negx; break;
1735 case 1: src_neg = pSource->negy; break;
1736 case 2: src_neg = pSource->negz; break;
1737 case 3: src_neg = pSource->negw; break;
1738 default: src_neg = 0; break;
1739 }
1740
1741 switch (source_index)
1742 {
1743 case 0:
1744 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1745 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1746 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1747 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1748 break;
1749 case 1:
1750 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1751 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1752 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1753 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1754 break;
1755 case 2:
1756 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1757 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1758 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1759 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1760 break;
1761 default:
1762 radeon_error("Only three sources allowed in ALU opcodes.\n");
1763 return GL_FALSE;
1764 break;
1765 }
1766
1767 return GL_TRUE;
1768 }
1769
1770 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1771 R700ALUInstruction* alu_instruction_ptr,
1772 GLuint contiguous_slots_needed)
1773 {
1774 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1775 {
1776 return GL_FALSE;
1777 }
1778
1779 if ( pAsm->alu_x_opcode != 0 ||
1780 pAsm->cf_current_alu_clause_ptr == NULL ||
1781 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1782 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1783 ) )
1784 {
1785
1786 //new cf inst for this clause
1787 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1788
1789 // link the new cf to cf segment
1790 if(NULL != pAsm->cf_current_alu_clause_ptr)
1791 {
1792 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1793 AddCFInstruction( pAsm->pR700Shader,
1794 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1795 }
1796 else
1797 {
1798 radeon_error("Could not allocate a new ALU CF instruction.\n");
1799 return GL_FALSE;
1800 }
1801
1802 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1803 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1804 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1805
1806 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1807 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1808 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1809
1810 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1811
1812 if(pAsm->alu_x_opcode != 0)
1813 {
1814 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
1815 pAsm->alu_x_opcode = 0;
1816 }
1817 else
1818 {
1819 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1820 }
1821
1822 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1823
1824 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1825 }
1826 else
1827 {
1828 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count += (GetInstructionSize(alu_instruction_ptr->m_ShaderInstType) / 2);
1829 }
1830
1831 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1832 // set the whole_quad_mode for this clause
1833 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1834 {
1835 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1836 }
1837
1838 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1839 {
1840 alu_instruction_ptr->m_Word0.f.last = 1;
1841 }
1842
1843 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1844 {
1845 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1846 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1847 }
1848
1849 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1850
1851 return GL_TRUE;
1852 }
1853
1854 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1855 int source_index,
1856 BITS* psrc_sel,
1857 BITS* psrc_rel,
1858 BITS* psrc_chan,
1859 BITS* psrc_neg)
1860 {
1861 switch (source_index)
1862 {
1863 case 0:
1864 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1865 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1866 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1867 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1868 break;
1869
1870 case 1:
1871 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1872 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1873 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1874 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1875 break;
1876
1877 case 2:
1878 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1879 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1880 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1881 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1882 break;
1883 }
1884 }
1885
1886 int is_cfile(BITS sel)
1887 {
1888 if (sel > 255 && sel < 512)
1889 {
1890 return 1;
1891 }
1892 return 0;
1893 }
1894
1895 int is_const(BITS sel)
1896 {
1897 if (is_cfile(sel))
1898 {
1899 return 1;
1900 }
1901 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1902 {
1903 return 1;
1904 }
1905 return 0;
1906 }
1907
1908 int is_gpr(BITS sel)
1909 {
1910 if (sel >= 0 && sel < 128)
1911 {
1912 return 1;
1913 }
1914 return 0;
1915 }
1916
1917 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1918 SQ_ALU_VEC_120, //001
1919 SQ_ALU_VEC_102, //010
1920
1921 SQ_ALU_VEC_201, //011
1922 SQ_ALU_VEC_012, //100
1923 SQ_ALU_VEC_021, //101
1924
1925 SQ_ALU_VEC_012, //110
1926 SQ_ALU_VEC_012}; //111
1927
1928 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1929 SQ_ALU_SCL_122, //001
1930 SQ_ALU_SCL_122, //010
1931
1932 SQ_ALU_SCL_221, //011
1933 SQ_ALU_SCL_212, //100
1934 SQ_ALU_SCL_122, //101
1935
1936 SQ_ALU_SCL_122, //110
1937 SQ_ALU_SCL_122}; //111
1938
1939 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1940 GLuint sel,
1941 GLuint chan)
1942 {
1943 int res_match = (-1);
1944 int res_empty = (-1);
1945
1946 GLint res;
1947
1948 for (res=3; res>=0; res--)
1949 {
1950 if(pAsm->hw_cfile_addr[ res] < 0)
1951 {
1952 res_empty = res;
1953 }
1954 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1955 &&
1956 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1957 {
1958 res_match = res;
1959 }
1960 }
1961
1962 if(res_match >= 0)
1963 {
1964 // Read for this scalar component already reserved, nothing to do here.
1965 ;
1966 }
1967 else if(res_empty >= 0)
1968 {
1969 pAsm->hw_cfile_addr[ res_empty ] = sel;
1970 pAsm->hw_cfile_chan[ res_empty ] = chan;
1971 }
1972 else
1973 {
1974 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1975 return GL_FALSE;
1976 }
1977 return GL_TRUE;
1978 }
1979
1980 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1981 {
1982 if(pAsm->hw_gpr[cycle][chan] < 0)
1983 {
1984 pAsm->hw_gpr[cycle][chan] = sel;
1985 }
1986 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1987 {
1988 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1989 return GL_FALSE;
1990 }
1991
1992 return GL_TRUE;
1993 }
1994
1995 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1996 {
1997 switch (swiz)
1998 {
1999 case SQ_ALU_SCL_210:
2000 {
2001 int table[3] = {2, 1, 0};
2002 *pCycle = table[sel];
2003 return GL_TRUE;
2004 }
2005 break;
2006 case SQ_ALU_SCL_122:
2007 {
2008 int table[3] = {1, 2, 2};
2009 *pCycle = table[sel];
2010 return GL_TRUE;
2011 }
2012 break;
2013 case SQ_ALU_SCL_212:
2014 {
2015 int table[3] = {2, 1, 2};
2016 *pCycle = table[sel];
2017 return GL_TRUE;
2018 }
2019 break;
2020 case SQ_ALU_SCL_221:
2021 {
2022 int table[3] = {2, 2, 1};
2023 *pCycle = table[sel];
2024 return GL_TRUE;
2025 }
2026 break;
2027 default:
2028 radeon_error("Bad Scalar bank swizzle value\n");
2029 break;
2030 }
2031
2032 return GL_FALSE;
2033 }
2034
2035 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
2036 {
2037 switch (swiz)
2038 {
2039 case SQ_ALU_VEC_012:
2040 {
2041 int table[3] = {0, 1, 2};
2042 *pCycle = table[sel];
2043 }
2044 break;
2045 case SQ_ALU_VEC_021:
2046 {
2047 int table[3] = {0, 2, 1};
2048 *pCycle = table[sel];
2049 }
2050 break;
2051 case SQ_ALU_VEC_120:
2052 {
2053 int table[3] = {1, 2, 0};
2054 *pCycle = table[sel];
2055 }
2056 break;
2057 case SQ_ALU_VEC_102:
2058 {
2059 int table[3] = {1, 0, 2};
2060 *pCycle = table[sel];
2061 }
2062 break;
2063 case SQ_ALU_VEC_201:
2064 {
2065 int table[3] = {2, 0, 1};
2066 *pCycle = table[sel];
2067 }
2068 break;
2069 case SQ_ALU_VEC_210:
2070 {
2071 int table[3] = {2, 1, 0};
2072 *pCycle = table[sel];
2073 }
2074 break;
2075 default:
2076 radeon_error("Bad Vec bank swizzle value\n");
2077 return GL_FALSE;
2078 break;
2079 }
2080
2081 return GL_TRUE;
2082 }
2083
2084 GLboolean check_scalar(r700_AssemblerBase* pAsm,
2085 R700ALUInstruction* alu_instruction_ptr)
2086 {
2087 GLuint cycle;
2088 GLuint bank_swizzle;
2089 GLuint const_count = 0;
2090
2091 BITS sel;
2092 BITS chan;
2093 BITS rel;
2094 BITS neg;
2095
2096 GLuint src;
2097
2098 BITS src_sel [3] = {0,0,0};
2099 BITS src_chan[3] = {0,0,0};
2100 BITS src_rel [3] = {0,0,0};
2101 BITS src_neg [3] = {0,0,0};
2102
2103 GLuint swizzle_key;
2104
2105 GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2106
2107 for (src=0; src<number_of_operands; src++)
2108 {
2109 get_src_properties(alu_instruction_ptr,
2110 src,
2111 &(src_sel[src]),
2112 &(src_rel[src]),
2113 &(src_chan[src]),
2114 &(src_neg[src]) );
2115 }
2116
2117
2118 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2119 (is_const( src_sel[1] ) ? 2 : 0) +
2120 (is_const( src_sel[2] ) ? 1 : 0) );
2121
2122 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2123
2124 for (src=0; src<number_of_operands; src++)
2125 {
2126 sel = src_sel [src];
2127 chan = src_chan[src];
2128 rel = src_rel [src];
2129 neg = src_neg [src];
2130
2131 if (is_const( sel ))
2132 {
2133 // Any constant, including literal and inline constants
2134 const_count++;
2135
2136 if (is_cfile( sel ))
2137 {
2138 reserve_cfile(pAsm, sel, chan);
2139 }
2140
2141 }
2142 }
2143
2144 for (src=0; src<number_of_operands; src++)
2145 {
2146 sel = src_sel [src];
2147 chan = src_chan[src];
2148 rel = src_rel [src];
2149 neg = src_neg [src];
2150
2151 if( is_gpr(sel) )
2152 {
2153 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2154
2155 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2156 {
2157 return GL_FALSE;
2158 }
2159
2160 if(cycle < const_count)
2161 {
2162 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2163 {
2164 return GL_FALSE;
2165 }
2166 }
2167 }
2168 }
2169
2170 return GL_TRUE;
2171 }
2172
2173 GLboolean check_vector(r700_AssemblerBase* pAsm,
2174 R700ALUInstruction* alu_instruction_ptr)
2175 {
2176 GLuint cycle;
2177 GLuint bank_swizzle;
2178 GLuint const_count = 0;
2179
2180 GLuint src;
2181
2182 BITS sel;
2183 BITS chan;
2184 BITS rel;
2185 BITS neg;
2186
2187 BITS src_sel [3] = {0,0,0};
2188 BITS src_chan[3] = {0,0,0};
2189 BITS src_rel [3] = {0,0,0};
2190 BITS src_neg [3] = {0,0,0};
2191
2192 GLuint swizzle_key;
2193
2194 GLuint number_of_operands = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2195
2196 for (src=0; src<number_of_operands; src++)
2197 {
2198 get_src_properties(alu_instruction_ptr,
2199 src,
2200 &(src_sel[src]),
2201 &(src_rel[src]),
2202 &(src_chan[src]),
2203 &(src_neg[src]) );
2204 }
2205
2206
2207 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2208 (is_const( src_sel[1] ) ? 2 : 0) +
2209 (is_const( src_sel[2] ) ? 1 : 0)
2210 );
2211
2212 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2213
2214 for (src=0; src<number_of_operands; src++)
2215 {
2216 sel = src_sel [src];
2217 chan = src_chan[src];
2218 rel = src_rel [src];
2219 neg = src_neg [src];
2220
2221
2222 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2223
2224 if( is_gpr(sel) )
2225 {
2226 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2227 {
2228 return GL_FALSE;
2229 }
2230
2231 if ( (src == 1) &&
2232 (sel == src_sel[0]) &&
2233 (chan == src_chan[0]) )
2234 {
2235 }
2236 else
2237 {
2238 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2239 {
2240 return GL_FALSE;
2241 }
2242 }
2243 }
2244 else if( is_const(sel) )
2245 {
2246 const_count++;
2247
2248 if( is_cfile(sel) )
2249 {
2250 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2251 {
2252 return GL_FALSE;
2253 }
2254 }
2255 }
2256 }
2257
2258 return GL_TRUE;
2259 }
2260
2261 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2262 {
2263 R700ALUInstruction * alu_instruction_ptr;
2264 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2265 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2266
2267 GLuint number_of_scalar_operations;
2268 GLboolean is_single_scalar_operation;
2269 GLuint scalar_channel_index;
2270
2271 PVSSRC * pcurrent_source;
2272 int current_source_index;
2273 GLuint contiguous_slots_needed;
2274
2275 GLuint uNumSrc = r700GetNumOperands(pAsm->D.dst.opcode, pAsm->D.dst.op3);
2276 //GLuint channel_swizzle, j;
2277 //GLuint chan_counter[4] = {0, 0, 0, 0};
2278 //PVSSRC * pSource[3];
2279 GLboolean bSplitInst = GL_FALSE;
2280
2281 if (1 == pAsm->D.dst.math)
2282 {
2283 is_single_scalar_operation = GL_TRUE;
2284 number_of_scalar_operations = 1;
2285 }
2286 else
2287 {
2288 is_single_scalar_operation = GL_FALSE;
2289 number_of_scalar_operations = 4;
2290
2291 /* current assembler doesn't do more than 1 register per source */
2292 #if 0
2293 /* check read port, only very preliminary algorithm, not count in
2294 src0/1 same comp case and prev slot repeat case; also not count relative
2295 addressing. TODO: improve performance. */
2296 for(j=0; j<uNumSrc; j++)
2297 {
2298 pSource[j] = &(pAsm->S[j].src);
2299 }
2300 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2301 {
2302 for(j=0; j<uNumSrc; j++)
2303 {
2304 switch (scalar_channel_index)
2305 {
2306 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2307 case 1: channel_swizzle = pSource[j]->swizzley; break;
2308 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2309 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2310 default: channel_swizzle = SQ_SEL_MASK; break;
2311 }
2312 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2313 (pSource[j]->rtype == SRC_REG_INPUT))
2314 && (channel_swizzle <= SQ_SEL_W) )
2315 {
2316 chan_counter[channel_swizzle]++;
2317 }
2318 }
2319 }
2320 if( (chan_counter[SQ_SEL_X] > 3)
2321 || (chan_counter[SQ_SEL_Y] > 3)
2322 || (chan_counter[SQ_SEL_Z] > 3)
2323 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2324 {
2325 bSplitInst = GL_TRUE;
2326 }
2327 #endif
2328 }
2329
2330 contiguous_slots_needed = 0;
2331
2332 if(!is_single_scalar_operation)
2333 {
2334 contiguous_slots_needed = 4;
2335 }
2336
2337 contiguous_slots_needed += pAsm->D2.dst2.literal_slots;
2338
2339 initialize(pAsm);
2340
2341 for (scalar_channel_index=0;
2342 scalar_channel_index < number_of_scalar_operations;
2343 scalar_channel_index++)
2344 {
2345 if(scalar_channel_index == (number_of_scalar_operations-1))
2346 {
2347 switch(pAsm->D2.dst2.literal_slots)
2348 {
2349 case 0:
2350 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2351 Init_R700ALUInstruction(alu_instruction_ptr);
2352 break;
2353 case 1:
2354 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2355 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pAsm->C[0].f, pAsm->C[1].f);
2356 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2357 break;
2358 case 2:
2359 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2360 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl,pAsm->C[0].f, pAsm->C[1].f, pAsm->C[2].f, pAsm->C[3].f);
2361 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2362 break;
2363 };
2364 }
2365 else
2366 {
2367 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2368 Init_R700ALUInstruction(alu_instruction_ptr);
2369 }
2370
2371 //src 0
2372 current_source_index = 0;
2373 pcurrent_source = &(pAsm->S[0].src);
2374
2375 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2376 current_source_index,
2377 pcurrent_source,
2378 scalar_channel_index) )
2379 {
2380 return GL_FALSE;
2381 }
2382
2383 if (uNumSrc > 1)
2384 {
2385 // Process source 1
2386 current_source_index = 1;
2387 pcurrent_source = &(pAsm->S[current_source_index].src);
2388
2389 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2390 current_source_index,
2391 pcurrent_source,
2392 scalar_channel_index) )
2393 {
2394 return GL_FALSE;
2395 }
2396 }
2397
2398 //other bits
2399 alu_instruction_ptr->m_Word0.f.index_mode = pAsm->D2.dst2.index_mode;
2400
2401 if( (is_single_scalar_operation == GL_TRUE)
2402 || (GL_TRUE == bSplitInst) )
2403 {
2404 alu_instruction_ptr->m_Word0.f.last = 1;
2405 }
2406 else
2407 {
2408 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2409 }
2410
2411 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
2412 if(1 == pAsm->D.dst.predicated)
2413 {
2414 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2415 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2416 }
2417 else
2418 {
2419 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2420 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2421 }
2422
2423 // dst
2424 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2425 (pAsm->D.dst.rtype == DST_REG_OUT) )
2426 {
2427 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2428 }
2429 else
2430 {
2431 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2432 return GL_FALSE;
2433 }
2434
2435 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2436
2437 if ( is_single_scalar_operation == GL_TRUE )
2438 {
2439 // Override scalar_channel_index since only one scalar value will be written
2440 if(pAsm->D.dst.writex)
2441 {
2442 scalar_channel_index = 0;
2443 }
2444 else if(pAsm->D.dst.writey)
2445 {
2446 scalar_channel_index = 1;
2447 }
2448 else if(pAsm->D.dst.writez)
2449 {
2450 scalar_channel_index = 2;
2451 }
2452 else if(pAsm->D.dst.writew)
2453 {
2454 scalar_channel_index = 3;
2455 }
2456 }
2457
2458 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2459
2460 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2461
2462 if (pAsm->D.dst.op3)
2463 {
2464 //op3
2465
2466 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2467
2468 //There's 3rd src for op3
2469 current_source_index = 2;
2470 pcurrent_source = &(pAsm->S[current_source_index].src);
2471
2472 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2473 current_source_index,
2474 pcurrent_source,
2475 scalar_channel_index) )
2476 {
2477 return GL_FALSE;
2478 }
2479 }
2480 else
2481 {
2482 //op2
2483 if (pAsm->bR6xx)
2484 {
2485 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2486
2487 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = pAsm->S[0].src.abs;
2488 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = pAsm->S[1].src.abs;
2489
2490 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2491 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2492 switch (scalar_channel_index)
2493 {
2494 case 0:
2495 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2496 break;
2497 case 1:
2498 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2499 break;
2500 case 2:
2501 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2502 break;
2503 case 3:
2504 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2505 break;
2506 default:
2507 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2508 break;
2509 }
2510 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2511 }
2512 else
2513 {
2514 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2515
2516 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = pAsm->S[0].src.abs;
2517 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = pAsm->S[1].src.abs;
2518
2519 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2520 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2521 switch (scalar_channel_index)
2522 {
2523 case 0:
2524 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2525 break;
2526 case 1:
2527 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2528 break;
2529 case 2:
2530 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2531 break;
2532 case 3:
2533 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2534 break;
2535 default:
2536 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2537 break;
2538 }
2539 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2540 }
2541 }
2542
2543 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2544 {
2545 return GL_FALSE;
2546 }
2547
2548 /*
2549 * Judge the type of current instruction, is it vector or scalar
2550 * instruction.
2551 */
2552 if (is_single_scalar_operation)
2553 {
2554 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2555 {
2556 return GL_FALSE;
2557 }
2558 }
2559 else
2560 {
2561 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2562 {
2563 return GL_FALSE;
2564 }
2565 }
2566
2567 contiguous_slots_needed -= 1;
2568 }
2569
2570 return GL_TRUE;
2571 }
2572
2573 GLboolean next_ins(r700_AssemblerBase *pAsm)
2574 {
2575 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2576
2577 if( GL_TRUE == pAsm->is_tex )
2578 {
2579 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2580 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2581 {
2582 radeon_error("Error assembling TEX instruction\n");
2583 return GL_FALSE;
2584 }
2585 } else {
2586 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2587 {
2588 radeon_error("Error assembling TEX instruction\n");
2589 return GL_FALSE;
2590 }
2591 }
2592 }
2593 else
2594 { //ALU
2595 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2596 {
2597 radeon_error("Error assembling ALU instruction\n");
2598 return GL_FALSE;
2599 }
2600 }
2601
2602 if(pAsm->D.dst.rtype == DST_REG_OUT)
2603 {
2604 if(pAsm->D.dst.op3)
2605 {
2606 // There is no mask for OP3 instructions, so all channels are written
2607 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2608 }
2609 else
2610 {
2611 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2612 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2613 }
2614 }
2615
2616 //reset for next inst.
2617 pAsm->D.bits = 0;
2618 pAsm->D2.bits = 0;
2619 pAsm->S[0].bits = 0;
2620 pAsm->S[1].bits = 0;
2621 pAsm->S[2].bits = 0;
2622 pAsm->is_tex = GL_FALSE;
2623 pAsm->need_tex_barrier = GL_FALSE;
2624 pAsm->D2.bits = 0;
2625 pAsm->C[0].bits = pAsm->C[1].bits = pAsm->C[2].bits = pAsm->C[3].bits = 0;
2626 return GL_TRUE;
2627 }
2628
2629 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2630 {
2631 BITS tmp;
2632
2633 checkop1(pAsm);
2634
2635 tmp = gethelpr(pAsm);
2636
2637 // opcode tmp.x, a.x
2638 // MOV dst, tmp.x
2639
2640 pAsm->D.dst.opcode = opcode;
2641 pAsm->D.dst.math = 1;
2642
2643 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2644 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2645 pAsm->D.dst.reg = tmp;
2646 pAsm->D.dst.writex = 1;
2647
2648 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2649 {
2650 return GL_FALSE;
2651 }
2652
2653 if ( GL_FALSE == next_ins(pAsm) )
2654 {
2655 return GL_FALSE;
2656 }
2657
2658 // Now replicate result to all necessary channels in destination
2659 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2660
2661 if( GL_FALSE == assemble_dst(pAsm) )
2662 {
2663 return GL_FALSE;
2664 }
2665
2666 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2667 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2668 pAsm->S[0].src.reg = tmp;
2669
2670 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2671 noneg_PVSSRC(&(pAsm->S[0].src));
2672
2673 if( GL_FALSE == next_ins(pAsm) )
2674 {
2675 return GL_FALSE;
2676 }
2677
2678 return GL_TRUE;
2679 }
2680
2681 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2682 {
2683 checkop1(pAsm);
2684
2685 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2686
2687 if( GL_FALSE == assemble_dst(pAsm) )
2688 {
2689 return GL_FALSE;
2690 }
2691 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2692 {
2693 return GL_FALSE;
2694 }
2695
2696 pAsm->S[1].bits = pAsm->S[0].bits;
2697 flipneg_PVSSRC(&(pAsm->S[1].src));
2698
2699 if ( GL_FALSE == next_ins(pAsm) )
2700 {
2701 return GL_FALSE;
2702 }
2703
2704 return GL_TRUE;
2705 }
2706
2707 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2708 {
2709 if( GL_FALSE == checkop2(pAsm) )
2710 {
2711 return GL_FALSE;
2712 }
2713
2714 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2715
2716 if( GL_FALSE == assemble_dst(pAsm) )
2717 {
2718 return GL_FALSE;
2719 }
2720
2721 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2722 {
2723 return GL_FALSE;
2724 }
2725
2726 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2727 {
2728 return GL_FALSE;
2729 }
2730
2731 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2732 {
2733 flipneg_PVSSRC(&(pAsm->S[1].src));
2734 }
2735
2736 if( GL_FALSE == next_ins(pAsm) )
2737 {
2738 return GL_FALSE;
2739 }
2740
2741 return GL_TRUE;
2742 }
2743
2744 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
2745 { /* TODO: ar values dont' persist between clauses */
2746 if( GL_FALSE == checkop1(pAsm) )
2747 {
2748 return GL_FALSE;
2749 }
2750
2751 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
2752 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2753 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2754 pAsm->D.dst.reg = 0;
2755 pAsm->D.dst.writex = 0;
2756 pAsm->D.dst.writey = 0;
2757 pAsm->D.dst.writez = 0;
2758 pAsm->D.dst.writew = 0;
2759
2760 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2761 {
2762 return GL_FALSE;
2763 }
2764
2765 if( GL_FALSE == next_ins(pAsm) )
2766 {
2767 return GL_FALSE;
2768 }
2769
2770 return GL_TRUE;
2771 }
2772
2773 GLboolean assemble_BAD(char *opcode_str)
2774 {
2775 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2776 return GL_FALSE;
2777 }
2778
2779 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2780 {
2781 int tmp;
2782
2783 if( GL_FALSE == checkop3(pAsm) )
2784 {
2785 return GL_FALSE;
2786 }
2787
2788 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2789 pAsm->D.dst.op3 = 1;
2790
2791 tmp = (-1);
2792
2793 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2794 {
2795 //OP3 has no support for write mask
2796 tmp = gethelpr(pAsm);
2797
2798 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2799 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2800 pAsm->D.dst.reg = tmp;
2801
2802 nomask_PVSDST(&(pAsm->D.dst));
2803 }
2804 else
2805 {
2806 if( GL_FALSE == assemble_dst(pAsm) )
2807 {
2808 return GL_FALSE;
2809 }
2810 }
2811
2812 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2813 {
2814 return GL_FALSE;
2815 }
2816
2817 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2818 {
2819 return GL_FALSE;
2820 }
2821
2822 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2823 {
2824 return GL_FALSE;
2825 }
2826
2827 if ( GL_FALSE == next_ins(pAsm) )
2828 {
2829 return GL_FALSE;
2830 }
2831
2832 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2833 {
2834 if( GL_FALSE == assemble_dst(pAsm) )
2835 {
2836 return GL_FALSE;
2837 }
2838
2839 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2840
2841 //tmp for source
2842 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2843 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2844 pAsm->S[0].src.reg = tmp;
2845
2846 noneg_PVSSRC(&(pAsm->S[0].src));
2847 noswizzle_PVSSRC(&(pAsm->S[0].src));
2848
2849 if( GL_FALSE == next_ins(pAsm) )
2850 {
2851 return GL_FALSE;
2852 }
2853 }
2854
2855 return GL_TRUE;
2856 }
2857
2858 GLboolean assemble_TRIG(r700_AssemblerBase *pAsm, BITS opcode)
2859 {
2860 int tmp;
2861 checkop1(pAsm);
2862
2863 tmp = gethelpr(pAsm);
2864
2865 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2866 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2867 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2868 pAsm->D.dst.reg = tmp;
2869 pAsm->D.dst.writex = 1;
2870
2871 assemble_src(pAsm, 0, -1);
2872
2873 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
2874 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
2875 pAsm->D2.dst2.literal_slots = 1;
2876 pAsm->C[0].f = 1/(3.1415926535 * 2);
2877 pAsm->C[1].f = 0.0F;
2878 next_ins(pAsm);
2879
2880 pAsm->D.dst.opcode = opcode;
2881 pAsm->D.dst.math = 1;
2882
2883 assemble_dst(pAsm);
2884
2885 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2886 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2887 pAsm->S[0].src.reg = tmp;
2888 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2889 noneg_PVSSRC(&(pAsm->S[0].src));
2890
2891 next_ins(pAsm);
2892
2893 //TODO - replicate if more channels set in WriteMask
2894 return GL_TRUE;
2895
2896 }
2897
2898 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2899 {
2900 if( GL_FALSE == checkop2(pAsm) )
2901 {
2902 return GL_FALSE;
2903 }
2904
2905 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2906
2907 if( GL_FALSE == assemble_dst(pAsm) )
2908 {
2909 return GL_FALSE;
2910 }
2911
2912 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2913 {
2914 return GL_FALSE;
2915 }
2916
2917 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2918 {
2919 return GL_FALSE;
2920 }
2921
2922 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2923 {
2924 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2925 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2926 }
2927 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2928 {
2929 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2930 }
2931
2932 if ( GL_FALSE == next_ins(pAsm) )
2933 {
2934 return GL_FALSE;
2935 }
2936
2937 return GL_TRUE;
2938 }
2939
2940 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2941 {
2942 if( GL_FALSE == checkop2(pAsm) )
2943 {
2944 return GL_FALSE;
2945 }
2946
2947 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2948
2949 if( GL_FALSE == assemble_dst(pAsm) )
2950 {
2951 return GL_FALSE;
2952 }
2953
2954 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2955 {
2956 return GL_FALSE;
2957 }
2958
2959 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2960 {
2961 return GL_FALSE;
2962 }
2963
2964 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2965 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2966
2967 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2968 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2969
2970 if ( GL_FALSE == next_ins(pAsm) )
2971 {
2972 return GL_FALSE;
2973 }
2974
2975 return GL_TRUE;
2976 }
2977
2978 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2979 {
2980 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2981 }
2982
2983 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
2984 {
2985 BITS tmp;
2986
2987 checkop1(pAsm);
2988
2989 tmp = gethelpr(pAsm);
2990
2991 // FLOOR tmp.x, a.x
2992 // EX2 dst.x tmp.x
2993
2994 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
2995 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2996
2997 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2998 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2999 pAsm->D.dst.reg = tmp;
3000 pAsm->D.dst.writex = 1;
3001
3002 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3003 {
3004 return GL_FALSE;
3005 }
3006
3007 if( GL_FALSE == next_ins(pAsm) )
3008 {
3009 return GL_FALSE;
3010 }
3011
3012 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3013 pAsm->D.dst.math = 1;
3014
3015 if( GL_FALSE == assemble_dst(pAsm) )
3016 {
3017 return GL_FALSE;
3018 }
3019
3020 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3021
3022 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3023 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3024 pAsm->S[0].src.reg = tmp;
3025
3026 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3027 noneg_PVSSRC(&(pAsm->S[0].src));
3028
3029 if( GL_FALSE == next_ins(pAsm) )
3030 {
3031 return GL_FALSE;
3032 }
3033 }
3034
3035 // FRACT dst.y a.x
3036
3037 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3038 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3039
3040 if( GL_FALSE == assemble_dst(pAsm) )
3041 {
3042 return GL_FALSE;
3043 }
3044
3045 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3046 {
3047 return GL_FALSE;
3048 }
3049
3050 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3051
3052 if( GL_FALSE == next_ins(pAsm) )
3053 {
3054 return GL_FALSE;
3055 }
3056 }
3057
3058 // EX2 dst.z, a.x
3059
3060 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3061 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3062 pAsm->D.dst.math = 1;
3063
3064 if( GL_FALSE == assemble_dst(pAsm) )
3065 {
3066 return GL_FALSE;
3067 }
3068
3069 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3070 {
3071 return GL_FALSE;
3072 }
3073
3074 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3075
3076 if( GL_FALSE == next_ins(pAsm) )
3077 {
3078 return GL_FALSE;
3079 }
3080 }
3081
3082 // MOV dst.w 1.0
3083
3084 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3085 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3086
3087 if( GL_FALSE == assemble_dst(pAsm) )
3088 {
3089 return GL_FALSE;
3090 }
3091
3092 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3093
3094 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3095 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3096 pAsm->S[0].src.reg = tmp;
3097
3098 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3099 noneg_PVSSRC(&(pAsm->S[0].src));
3100
3101 if( GL_FALSE == next_ins(pAsm) )
3102 {
3103 return GL_FALSE;
3104 }
3105 }
3106
3107 return GL_TRUE;
3108 }
3109
3110 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3111 {
3112 checkop1(pAsm);
3113
3114 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3115
3116 if ( GL_FALSE == assemble_dst(pAsm) )
3117 {
3118 return GL_FALSE;
3119 }
3120
3121 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3122 {
3123 return GL_FALSE;
3124 }
3125
3126 if ( GL_FALSE == next_ins(pAsm) )
3127 {
3128 return GL_FALSE;
3129 }
3130
3131 return GL_TRUE;
3132 }
3133
3134 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3135 {
3136 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3137 }
3138
3139 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3140 {
3141 checkop1(pAsm);
3142
3143 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3144
3145 if ( GL_FALSE == assemble_dst(pAsm) )
3146 {
3147 return GL_FALSE;
3148 }
3149
3150 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3151 {
3152 return GL_FALSE;
3153 }
3154
3155 if ( GL_FALSE == next_ins(pAsm) )
3156 {
3157 return GL_FALSE;
3158 }
3159
3160 return GL_TRUE;
3161 }
3162
3163 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3164 {
3165 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3166
3167 if(pILInst->Opcode == OPCODE_KIL)
3168 checkop1(pAsm);
3169
3170 pAsm->D.dst.opcode = opcode;
3171 //pAsm->D.dst.math = 1;
3172
3173 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3174 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3175 pAsm->D.dst.reg = 0;
3176 pAsm->D.dst.writex = 0;
3177 pAsm->D.dst.writey = 0;
3178 pAsm->D.dst.writez = 0;
3179 pAsm->D.dst.writew = 0;
3180
3181 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3182 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3183 pAsm->S[0].src.reg = 0;
3184 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3185 noneg_PVSSRC(&(pAsm->S[0].src));
3186
3187 if(pILInst->Opcode == OPCODE_KIL_NV)
3188 {
3189 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3190 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3191 pAsm->S[1].src.reg = 0;
3192 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_1);
3193 neg_PVSSRC(&(pAsm->S[1].src));
3194 }
3195 else
3196 {
3197 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3198 {
3199 return GL_FALSE;
3200 }
3201
3202 }
3203
3204 if ( GL_FALSE == next_ins(pAsm) )
3205 {
3206 return GL_FALSE;
3207 }
3208
3209 /* Doc says KILL has to be last(end) ALU clause */
3210 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3211 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3212
3213 return GL_TRUE;
3214 }
3215
3216 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3217 {
3218 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3219 }
3220
3221 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3222 {
3223 BITS tmp;
3224
3225 if( GL_FALSE == checkop3(pAsm) )
3226 {
3227 return GL_FALSE;
3228 }
3229
3230 tmp = gethelpr(pAsm);
3231
3232 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3233
3234 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3235 pAsm->D.dst.reg = tmp;
3236 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3237 nomask_PVSDST(&(pAsm->D.dst));
3238
3239
3240 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3241 {
3242 return GL_FALSE;
3243 }
3244
3245 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3246 {
3247 return GL_FALSE;
3248 }
3249
3250 neg_PVSSRC(&(pAsm->S[1].src));
3251
3252 if( GL_FALSE == next_ins(pAsm) )
3253 {
3254 return GL_FALSE;
3255 }
3256
3257 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3258 pAsm->D.dst.op3 = 1;
3259
3260 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3261 pAsm->D.dst.reg = tmp;
3262 nomask_PVSDST(&(pAsm->D.dst));
3263 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3264
3265 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3266 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3267 pAsm->S[0].src.reg = tmp;
3268 noswizzle_PVSSRC(&(pAsm->S[0].src));
3269
3270
3271 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3272 {
3273 return GL_FALSE;
3274 }
3275
3276 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3277 {
3278 return GL_FALSE;
3279 }
3280
3281 if( GL_FALSE == next_ins(pAsm) )
3282 {
3283 return GL_FALSE;
3284 }
3285
3286 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3287
3288 if( GL_FALSE == assemble_dst(pAsm) )
3289 {
3290 return GL_FALSE;
3291 }
3292
3293 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3294 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3295 pAsm->S[0].src.reg = tmp;
3296 noswizzle_PVSSRC(&(pAsm->S[0].src));
3297
3298 if( GL_FALSE == next_ins(pAsm) )
3299 {
3300 return GL_FALSE;
3301 }
3302
3303 return GL_TRUE;
3304 }
3305
3306 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3307 {
3308 BITS tmp1, tmp2, tmp3;
3309
3310 checkop1(pAsm);
3311
3312 tmp1 = gethelpr(pAsm);
3313 tmp2 = gethelpr(pAsm);
3314 tmp3 = gethelpr(pAsm);
3315
3316 // FIXME: The hardware can do fabs() directly on input
3317 // elements, but the compiler doesn't have the
3318 // capability to use that.
3319
3320 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3321
3322 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3323
3324 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3325 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3326 pAsm->D.dst.reg = tmp1;
3327 pAsm->D.dst.writex = 1;
3328
3329 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3330 {
3331 return GL_FALSE;
3332 }
3333
3334 pAsm->S[1].bits = pAsm->S[0].bits;
3335 flipneg_PVSSRC(&(pAsm->S[1].src));
3336
3337 if ( GL_FALSE == next_ins(pAsm) )
3338 {
3339 return GL_FALSE;
3340 }
3341
3342 // Entire algo:
3343 //
3344 // LG2 tmp2.x, tmp1.x
3345 // FLOOR tmp3.x, tmp2.x
3346 // MOV dst.x, tmp3.x
3347 // ADD tmp3.x, tmp2.x, -tmp3.x
3348 // EX2 dst.y, tmp3.x
3349 // MOV dst.z, tmp2.x
3350 // MOV dst.w, 1.0
3351
3352 // LG2 tmp2.x, tmp1.x
3353 // FLOOR tmp3.x, tmp2.x
3354
3355 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3356 pAsm->D.dst.math = 1;
3357
3358 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3359 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3360 pAsm->D.dst.reg = tmp2;
3361 pAsm->D.dst.writex = 1;
3362
3363 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3364 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3365 pAsm->S[0].src.reg = tmp1;
3366
3367 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3368 noneg_PVSSRC(&(pAsm->S[0].src));
3369
3370 if( GL_FALSE == next_ins(pAsm) )
3371 {
3372 return GL_FALSE;
3373 }
3374
3375 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3376
3377 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3378 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3379 pAsm->D.dst.reg = tmp3;
3380 pAsm->D.dst.writex = 1;
3381
3382 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3383 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3384 pAsm->S[0].src.reg = tmp2;
3385
3386 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3387 noneg_PVSSRC(&(pAsm->S[0].src));
3388
3389 if( GL_FALSE == next_ins(pAsm) )
3390 {
3391 return GL_FALSE;
3392 }
3393
3394 // MOV dst.x, tmp3.x
3395
3396 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3397
3398 if( GL_FALSE == assemble_dst(pAsm) )
3399 {
3400 return GL_FALSE;
3401 }
3402
3403 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3404
3405 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3406 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3407 pAsm->S[0].src.reg = tmp3;
3408
3409 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3410 noneg_PVSSRC(&(pAsm->S[0].src));
3411
3412 if( GL_FALSE == next_ins(pAsm) )
3413 {
3414 return GL_FALSE;
3415 }
3416
3417 // ADD tmp3.x, tmp2.x, -tmp3.x
3418 // EX2 dst.y, tmp3.x
3419
3420 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3421
3422 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3423 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3424 pAsm->D.dst.reg = tmp3;
3425 pAsm->D.dst.writex = 1;
3426
3427 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3428 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3429 pAsm->S[0].src.reg = tmp2;
3430
3431 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3432 noneg_PVSSRC(&(pAsm->S[0].src));
3433
3434 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3435 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3436 pAsm->S[1].src.reg = tmp3;
3437
3438 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3439 neg_PVSSRC(&(pAsm->S[1].src));
3440
3441 if( GL_FALSE == next_ins(pAsm) )
3442 {
3443 return GL_FALSE;
3444 }
3445
3446 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3447 pAsm->D.dst.math = 1;
3448
3449 if( GL_FALSE == assemble_dst(pAsm) )
3450 {
3451 return GL_FALSE;
3452 }
3453
3454 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3455
3456 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3457 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3458 pAsm->S[0].src.reg = tmp3;
3459
3460 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3461 noneg_PVSSRC(&(pAsm->S[0].src));
3462
3463 if( GL_FALSE == next_ins(pAsm) )
3464 {
3465 return GL_FALSE;
3466 }
3467
3468 // MOV dst.z, tmp2.x
3469
3470 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3471
3472 if( GL_FALSE == assemble_dst(pAsm) )
3473 {
3474 return GL_FALSE;
3475 }
3476
3477 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3478
3479 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3480 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3481 pAsm->S[0].src.reg = tmp2;
3482
3483 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3484 noneg_PVSSRC(&(pAsm->S[0].src));
3485
3486 if( GL_FALSE == next_ins(pAsm) )
3487 {
3488 return GL_FALSE;
3489 }
3490
3491 // MOV dst.w 1.0
3492
3493 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3494
3495 if( GL_FALSE == assemble_dst(pAsm) )
3496 {
3497 return GL_FALSE;
3498 }
3499
3500 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3501
3502 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3503 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3504 pAsm->S[0].src.reg = tmp1;
3505
3506 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3507 noneg_PVSSRC(&(pAsm->S[0].src));
3508
3509 if( GL_FALSE == next_ins(pAsm) )
3510 {
3511 return GL_FALSE;
3512 }
3513
3514 return GL_TRUE;
3515 }
3516
3517 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3518 {
3519 int tmp, ii;
3520 GLboolean bReplaceDst = GL_FALSE;
3521 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3522
3523 if( GL_FALSE == checkop3(pAsm) )
3524 {
3525 return GL_FALSE;
3526 }
3527
3528 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3529 pAsm->D.dst.op3 = 1;
3530
3531 tmp = (-1);
3532
3533 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3534 { /* TODO : more investigation on MAD src and dst using same register */
3535 for(ii=0; ii<3; ii++)
3536 {
3537 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3538 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3539 {
3540 bReplaceDst = GL_TRUE;
3541 break;
3542 }
3543 }
3544 }
3545 if(0xF != pILInst->DstReg.WriteMask)
3546 { /* OP3 has no support for write mask */
3547 bReplaceDst = GL_TRUE;
3548 }
3549
3550 if(GL_TRUE == bReplaceDst)
3551 {
3552 tmp = gethelpr(pAsm);
3553
3554 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3555 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3556 pAsm->D.dst.reg = tmp;
3557
3558 nomask_PVSDST(&(pAsm->D.dst));
3559 }
3560 else
3561 {
3562 if( GL_FALSE == assemble_dst(pAsm) )
3563 {
3564 return GL_FALSE;
3565 }
3566 }
3567
3568 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3569 {
3570 return GL_FALSE;
3571 }
3572
3573 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3574 {
3575 return GL_FALSE;
3576 }
3577
3578 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3579 {
3580 return GL_FALSE;
3581 }
3582
3583 if ( GL_FALSE == next_ins(pAsm) )
3584 {
3585 return GL_FALSE;
3586 }
3587
3588 if (GL_TRUE == bReplaceDst)
3589 {
3590 if( GL_FALSE == assemble_dst(pAsm) )
3591 {
3592 return GL_FALSE;
3593 }
3594
3595 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3596
3597 //tmp for source
3598 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3599 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3600 pAsm->S[0].src.reg = tmp;
3601
3602 noneg_PVSSRC(&(pAsm->S[0].src));
3603 noswizzle_PVSSRC(&(pAsm->S[0].src));
3604
3605 if( GL_FALSE == next_ins(pAsm) )
3606 {
3607 return GL_FALSE;
3608 }
3609 }
3610
3611 return GL_TRUE;
3612 }
3613
3614 /* LIT dst, src */
3615 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3616 {
3617 unsigned int dstReg;
3618 unsigned int dstType;
3619 unsigned int srcReg;
3620 unsigned int srcType;
3621 checkop1(pAsm);
3622 int tmp = gethelpr(pAsm);
3623
3624 if( GL_FALSE == assemble_dst(pAsm) )
3625 {
3626 return GL_FALSE;
3627 }
3628 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3629 {
3630 return GL_FALSE;
3631 }
3632 dstReg = pAsm->D.dst.reg;
3633 dstType = pAsm->D.dst.rtype;
3634 srcReg = pAsm->S[0].src.reg;
3635 srcType = pAsm->S[0].src.rtype;
3636
3637 /* dst.xw, <- 1.0 */
3638 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3639 pAsm->D.dst.rtype = dstType;
3640 pAsm->D.dst.reg = dstReg;
3641 pAsm->D.dst.writex = 1;
3642 pAsm->D.dst.writey = 0;
3643 pAsm->D.dst.writez = 0;
3644 pAsm->D.dst.writew = 1;
3645 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3646 pAsm->S[0].src.reg = tmp;
3647 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3648 noneg_PVSSRC(&(pAsm->S[0].src));
3649 pAsm->S[0].src.swizzlex = SQ_SEL_1;
3650 pAsm->S[0].src.swizzley = SQ_SEL_1;
3651 pAsm->S[0].src.swizzlez = SQ_SEL_1;
3652 pAsm->S[0].src.swizzlew = SQ_SEL_1;
3653 if( GL_FALSE == next_ins(pAsm) )
3654 {
3655 return GL_FALSE;
3656 }
3657
3658 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3659 {
3660 return GL_FALSE;
3661 }
3662
3663 /* dst.y = max(src.x, 0.0) */
3664 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3665 pAsm->D.dst.rtype = dstType;
3666 pAsm->D.dst.reg = dstReg;
3667 pAsm->D.dst.writex = 0;
3668 pAsm->D.dst.writey = 1;
3669 pAsm->D.dst.writez = 0;
3670 pAsm->D.dst.writew = 0;
3671 pAsm->S[0].src.rtype = srcType;
3672 pAsm->S[0].src.reg = srcReg;
3673 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3674 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3675 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3676 pAsm->S[1].src.reg = tmp;
3677 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3678 noneg_PVSSRC(&(pAsm->S[1].src));
3679 pAsm->S[1].src.swizzlex = SQ_SEL_0;
3680 pAsm->S[1].src.swizzley = SQ_SEL_0;
3681 pAsm->S[1].src.swizzlez = SQ_SEL_0;
3682 pAsm->S[1].src.swizzlew = SQ_SEL_0;
3683 if( GL_FALSE == next_ins(pAsm) )
3684 {
3685 return GL_FALSE;
3686 }
3687
3688 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3689 {
3690 return GL_FALSE;
3691 }
3692
3693 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
3694
3695 /* dst.z = log(src.y) */
3696 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
3697 pAsm->D.dst.math = 1;
3698 pAsm->D.dst.rtype = dstType;
3699 pAsm->D.dst.reg = dstReg;
3700 pAsm->D.dst.writex = 0;
3701 pAsm->D.dst.writey = 0;
3702 pAsm->D.dst.writez = 1;
3703 pAsm->D.dst.writew = 0;
3704 pAsm->S[0].src.rtype = srcType;
3705 pAsm->S[0].src.reg = srcReg;
3706 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3707 if( GL_FALSE == next_ins(pAsm) )
3708 {
3709 return GL_FALSE;
3710 }
3711
3712 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3713 {
3714 return GL_FALSE;
3715 }
3716
3717 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
3718 {
3719 return GL_FALSE;
3720 }
3721
3722 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3723
3724 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
3725
3726 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
3727 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
3728 pAsm->D.dst.math = 1;
3729 pAsm->D.dst.op3 = 1;
3730 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3731 pAsm->D.dst.reg = tmp;
3732 pAsm->D.dst.writex = 1;
3733 pAsm->D.dst.writey = 0;
3734 pAsm->D.dst.writez = 0;
3735 pAsm->D.dst.writew = 0;
3736
3737 pAsm->S[0].src.rtype = srcType;
3738 pAsm->S[0].src.reg = srcReg;
3739 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3740
3741 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3742 pAsm->S[1].src.reg = dstReg;
3743 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3744 noneg_PVSSRC(&(pAsm->S[1].src));
3745 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
3746 pAsm->S[1].src.swizzley = SQ_SEL_Z;
3747 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3748 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
3749
3750 pAsm->S[2].src.rtype = srcType;
3751 pAsm->S[2].src.reg = srcReg;
3752 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3753
3754 if( GL_FALSE == next_ins(pAsm) )
3755 {
3756 return GL_FALSE;
3757 }
3758
3759 /* dst.z = exp(tmp.x) */
3760 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3761 pAsm->D.dst.math = 1;
3762 pAsm->D.dst.rtype = dstType;
3763 pAsm->D.dst.reg = dstReg;
3764 pAsm->D.dst.writex = 0;
3765 pAsm->D.dst.writey = 0;
3766 pAsm->D.dst.writez = 1;
3767 pAsm->D.dst.writew = 0;
3768
3769 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3770 pAsm->S[0].src.reg = tmp;
3771 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3772 noneg_PVSSRC(&(pAsm->S[0].src));
3773 pAsm->S[0].src.swizzlex = SQ_SEL_X;
3774 pAsm->S[0].src.swizzley = SQ_SEL_X;
3775 pAsm->S[0].src.swizzlez = SQ_SEL_X;
3776 pAsm->S[0].src.swizzlew = SQ_SEL_X;
3777
3778 if( GL_FALSE == next_ins(pAsm) )
3779 {
3780 return GL_FALSE;
3781 }
3782
3783 return GL_TRUE;
3784 }
3785
3786 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3787 {
3788 if( GL_FALSE == checkop2(pAsm) )
3789 {
3790 return GL_FALSE;
3791 }
3792
3793 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3794
3795 if( GL_FALSE == assemble_dst(pAsm) )
3796 {
3797 return GL_FALSE;
3798 }
3799
3800 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3801 {
3802 return GL_FALSE;
3803 }
3804
3805 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3806 {
3807 return GL_FALSE;
3808 }
3809
3810 if( GL_FALSE == next_ins(pAsm) )
3811 {
3812 return GL_FALSE;
3813 }
3814
3815 return GL_TRUE;
3816 }
3817
3818 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3819 {
3820 if( GL_FALSE == checkop2(pAsm) )
3821 {
3822 return GL_FALSE;
3823 }
3824
3825 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3826
3827 if( GL_FALSE == assemble_dst(pAsm) )
3828 {
3829 return GL_FALSE;
3830 }
3831
3832 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3833 {
3834 return GL_FALSE;
3835 }
3836
3837 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3838 {
3839 return GL_FALSE;
3840 }
3841
3842 if( GL_FALSE == next_ins(pAsm) )
3843 {
3844 return GL_FALSE;
3845 }
3846
3847 return GL_TRUE;
3848 }
3849
3850 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3851 {
3852 checkop1(pAsm);
3853
3854 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3855
3856 if (GL_FALSE == assemble_dst(pAsm))
3857 {
3858 return GL_FALSE;
3859 }
3860
3861 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3862 {
3863 return GL_FALSE;
3864 }
3865
3866 if ( GL_FALSE == next_ins(pAsm) )
3867 {
3868 return GL_FALSE;
3869 }
3870
3871 return GL_TRUE;
3872 }
3873
3874 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3875 {
3876 if( GL_FALSE == checkop2(pAsm) )
3877 {
3878 return GL_FALSE;
3879 }
3880
3881 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3882
3883 if( GL_FALSE == assemble_dst(pAsm) )
3884 {
3885 return GL_FALSE;
3886 }
3887
3888 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3889 {
3890 return GL_FALSE;
3891 }
3892
3893 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3894 {
3895 return GL_FALSE;
3896 }
3897
3898 if( GL_FALSE == next_ins(pAsm) )
3899 {
3900 return GL_FALSE;
3901 }
3902
3903 return GL_TRUE;
3904 }
3905
3906 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3907 {
3908 BITS tmp;
3909
3910 checkop1(pAsm);
3911
3912 tmp = gethelpr(pAsm);
3913
3914 // LG2 tmp.x, a.swizzle
3915 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3916 pAsm->D.dst.math = 1;
3917
3918 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3919 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3920 pAsm->D.dst.reg = tmp;
3921 nomask_PVSDST(&(pAsm->D.dst));
3922
3923 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3924 {
3925 return GL_FALSE;
3926 }
3927
3928 if( GL_FALSE == next_ins(pAsm) )
3929 {
3930 return GL_FALSE;
3931 }
3932
3933 // MUL tmp.x, tmp.x, b.swizzle
3934 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3935
3936 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3937 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3938 pAsm->D.dst.reg = tmp;
3939 nomask_PVSDST(&(pAsm->D.dst));
3940
3941 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3942 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3943 pAsm->S[0].src.reg = tmp;
3944 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3945 noneg_PVSSRC(&(pAsm->S[0].src));
3946
3947 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3948 {
3949 return GL_FALSE;
3950 }
3951
3952 if( GL_FALSE == next_ins(pAsm) )
3953 {
3954 return GL_FALSE;
3955 }
3956
3957 // EX2 dst.mask, tmp.x
3958 // EX2 tmp.x, tmp.x
3959 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3960 pAsm->D.dst.math = 1;
3961
3962 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3963 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3964 pAsm->D.dst.reg = tmp;
3965 nomask_PVSDST(&(pAsm->D.dst));
3966
3967 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3968 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3969 pAsm->S[0].src.reg = tmp;
3970 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3971 noneg_PVSSRC(&(pAsm->S[0].src));
3972
3973 if( GL_FALSE == next_ins(pAsm) )
3974 {
3975 return GL_FALSE;
3976 }
3977
3978 // Now replicate result to all necessary channels in destination
3979 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3980
3981 if( GL_FALSE == assemble_dst(pAsm) )
3982 {
3983 return GL_FALSE;
3984 }
3985
3986 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3987 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3988 pAsm->S[0].src.reg = tmp;
3989
3990 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3991 noneg_PVSSRC(&(pAsm->S[0].src));
3992
3993 if( GL_FALSE == next_ins(pAsm) )
3994 {
3995 return GL_FALSE;
3996 }
3997
3998 return GL_TRUE;
3999 }
4000
4001 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4002 {
4003 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4004 }
4005
4006 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4007 {
4008 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4009 }
4010
4011 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4012 {
4013 BITS tmp;
4014
4015 checkop1(pAsm);
4016
4017 tmp = gethelpr(pAsm);
4018 /* tmp.x = src /2*PI */
4019 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4020 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4021 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4022 pAsm->D.dst.reg = tmp;
4023 pAsm->D.dst.writex = 1;
4024
4025 assemble_src(pAsm, 0, -1);
4026
4027 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
4028 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
4029 pAsm->D2.dst2.literal_slots = 1;
4030 pAsm->C[0].f = 1/(3.1415926535 * 2);
4031 pAsm->C[1].f = 0.0F;
4032
4033 next_ins(pAsm);
4034
4035 // COS dst.x, a.x
4036 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4037 pAsm->D.dst.math = 1;
4038
4039 assemble_dst(pAsm);
4040 /* mask y */
4041 pAsm->D.dst.writey = 0;
4042
4043 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4044 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4045 pAsm->S[0].src.reg = tmp;
4046 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4047 noneg_PVSSRC(&(pAsm->S[0].src));
4048
4049 if ( GL_FALSE == next_ins(pAsm) )
4050 {
4051 return GL_FALSE;
4052 }
4053
4054 // SIN dst.y, a.x
4055 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4056 pAsm->D.dst.math = 1;
4057
4058 assemble_dst(pAsm);
4059 /* mask x */
4060 pAsm->D.dst.writex = 0;
4061
4062 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4063 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4064 pAsm->S[0].src.reg = tmp;
4065 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4066 noneg_PVSSRC(&(pAsm->S[0].src));
4067
4068 if( GL_FALSE == next_ins(pAsm) )
4069 {
4070 return GL_FALSE;
4071 }
4072
4073 return GL_TRUE;
4074 }
4075
4076 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4077 {
4078 if( GL_FALSE == checkop2(pAsm) )
4079 {
4080 return GL_FALSE;
4081 }
4082
4083 pAsm->D.dst.opcode = opcode;
4084 //pAsm->D.dst.math = 1;
4085
4086 if( GL_FALSE == assemble_dst(pAsm) )
4087 {
4088 return GL_FALSE;
4089 }
4090
4091 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4092 {
4093 return GL_FALSE;
4094 }
4095
4096 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4097 {
4098 return GL_FALSE;
4099 }
4100
4101 if( GL_FALSE == next_ins(pAsm) )
4102 {
4103 return GL_FALSE;
4104 }
4105
4106 return GL_TRUE;
4107 }
4108
4109 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4110 {
4111 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
4112
4113 pAsm->D.dst.opcode = opcode;
4114 pAsm->D.dst.math = 1;
4115 pAsm->D.dst.predicated = 1;
4116
4117 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4118 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4119 pAsm->D.dst.reg = pAsm->uHelpReg;
4120 pAsm->D.dst.writex = 1;
4121 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
4122
4123 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4124 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4125 pAsm->S[0].src.reg = pAsm->last_cond_register + pAsm->starting_temp_register_number;
4126 pAsm->S[0].src.swizzlex = pILInst->DstReg.CondSwizzle & 0x7;
4127 noneg_PVSSRC(&(pAsm->S[0].src));
4128
4129 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4130 pAsm->S[1].src.reg = pAsm->uHelpReg;
4131 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4132 noneg_PVSSRC(&(pAsm->S[1].src));
4133 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4134 pAsm->S[1].src.swizzley = SQ_SEL_0;
4135 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4136 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4137
4138 if( GL_FALSE == next_ins(pAsm) )
4139 {
4140 return GL_FALSE;
4141 }
4142
4143 return GL_TRUE;
4144 }
4145
4146 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4147 {
4148 if( GL_FALSE == checkop2(pAsm) )
4149 {
4150 return GL_FALSE;
4151 }
4152
4153 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4154
4155 if( GL_FALSE == assemble_dst(pAsm) )
4156 {
4157 return GL_FALSE;
4158 }
4159
4160 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4161 {
4162 return GL_FALSE;
4163 }
4164
4165 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4166 {
4167 return GL_FALSE;
4168 }
4169
4170 if( GL_FALSE == next_ins(pAsm) )
4171 {
4172 return GL_FALSE;
4173 }
4174
4175 return GL_TRUE;
4176 }
4177
4178 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4179 {
4180 if( GL_FALSE == checkop2(pAsm) )
4181 {
4182 return GL_FALSE;
4183 }
4184
4185 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4186
4187 if( GL_FALSE == assemble_dst(pAsm) )
4188 {
4189 return GL_FALSE;
4190 }
4191
4192 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4193 {
4194 return GL_FALSE;
4195 }
4196
4197 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4198 {
4199 return GL_FALSE;
4200 }
4201
4202 if( GL_FALSE == next_ins(pAsm) )
4203 {
4204 return GL_FALSE;
4205 }
4206
4207 return GL_TRUE;
4208 }
4209
4210 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
4211 {
4212 return GL_TRUE;
4213 }
4214
4215 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
4216 {
4217 GLboolean src_const;
4218 GLboolean need_barrier = GL_FALSE;
4219
4220 checkop1(pAsm);
4221
4222 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
4223 {
4224 case PROGRAM_UNIFORM:
4225 case PROGRAM_CONSTANT:
4226 case PROGRAM_LOCAL_PARAM:
4227 case PROGRAM_ENV_PARAM:
4228 case PROGRAM_STATE_VAR:
4229 src_const = GL_TRUE;
4230 break;
4231 case PROGRAM_TEMPORARY:
4232 case PROGRAM_INPUT:
4233 default:
4234 src_const = GL_FALSE;
4235 break;
4236 }
4237
4238 if (GL_TRUE == src_const)
4239 {
4240 if ( GL_FALSE == mov_temp(pAsm, 0) )
4241 return GL_FALSE;
4242 need_barrier = GL_TRUE;
4243 }
4244
4245 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4246 {
4247 GLuint tmp = gethelpr(pAsm);
4248 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4249 pAsm->D.dst.math = 1;
4250 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4251 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4252 pAsm->D.dst.reg = tmp;
4253 pAsm->D.dst.writew = 1;
4254
4255 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4256 {
4257 return GL_FALSE;
4258 }
4259 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4260 if( GL_FALSE == next_ins(pAsm) )
4261 {
4262 return GL_FALSE;
4263 }
4264
4265 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4266 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4267 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4268 pAsm->D.dst.reg = tmp;
4269 pAsm->D.dst.writex = 1;
4270 pAsm->D.dst.writey = 1;
4271 pAsm->D.dst.writez = 1;
4272 pAsm->D.dst.writew = 0;
4273
4274 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4275 {
4276 return GL_FALSE;
4277 }
4278 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4279 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4280 pAsm->S[1].src.reg = tmp;
4281 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
4282
4283 if( GL_FALSE == next_ins(pAsm) )
4284 {
4285 return GL_FALSE;
4286 }
4287
4288 pAsm->aArgSubst[1] = tmp;
4289 need_barrier = GL_TRUE;
4290 }
4291
4292 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
4293 {
4294 GLuint tmp1 = gethelpr(pAsm);
4295 GLuint tmp2 = gethelpr(pAsm);
4296
4297 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4298 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
4299 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4300 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4301 pAsm->D.dst.reg = tmp1;
4302 nomask_PVSDST(&(pAsm->D.dst));
4303
4304 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4305 {
4306 return GL_FALSE;
4307 }
4308
4309 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4310 {
4311 return GL_FALSE;
4312 }
4313
4314 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
4315 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
4316
4317 if( GL_FALSE == next_ins(pAsm) )
4318 {
4319 return GL_FALSE;
4320 }
4321
4322 /* tmp1.z = RCP_e(|tmp1.z|) */
4323 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4324 pAsm->D.dst.math = 1;
4325 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4326 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4327 pAsm->D.dst.reg = tmp1;
4328 pAsm->D.dst.writez = 1;
4329
4330 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4331 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4332 pAsm->S[0].src.reg = tmp1;
4333 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4334 pAsm->S[0].src.abs = 1;
4335
4336 next_ins(pAsm);
4337
4338 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4339 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4340 * muladd has no writemask, have to use another temp
4341 */
4342 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4343 pAsm->D.dst.op3 = 1;
4344 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4345 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4346 pAsm->D.dst.reg = tmp2;
4347
4348 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4349 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4350 pAsm->S[0].src.reg = tmp1;
4351 noswizzle_PVSSRC(&(pAsm->S[0].src));
4352 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4353 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4354 pAsm->S[1].src.reg = tmp1;
4355 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4356 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4357 /* immediate c 1.5 */
4358 pAsm->D2.dst2.literal_slots = 1;
4359 pAsm->C[0].f = 1.5F;
4360 pAsm->S[2].src.rtype = SRC_REC_LITERAL;
4361 pAsm->S[2].src.reg = tmp1;
4362 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X);
4363
4364 next_ins(pAsm);
4365
4366 /* tmp1.xy = temp2.xy */
4367 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4368 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4369 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4370 pAsm->D.dst.reg = tmp1;
4371 pAsm->D.dst.writex = 1;
4372 pAsm->D.dst.writey = 1;
4373 pAsm->D.dst.writez = 0;
4374 pAsm->D.dst.writew = 0;
4375
4376 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4377 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4378 pAsm->S[0].src.reg = tmp2;
4379 noswizzle_PVSSRC(&(pAsm->S[0].src));
4380
4381 next_ins(pAsm);
4382 pAsm->aArgSubst[1] = tmp1;
4383 need_barrier = GL_TRUE;
4384
4385 }
4386
4387 switch(pAsm->pILInst[pAsm->uiCurInst].Opcode)
4388 {
4389 case OPCODE_DDX:
4390 /* will these need WQM(1) on CF inst ? */
4391 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_H;
4392 break;
4393 case OPCODE_DDY:
4394 pAsm->D.dst.opcode = SQ_TEX_INST_GET_GRADIENTS_V;
4395 break;
4396 case OPCODE_TXB:
4397 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_L;
4398 break;
4399 default:
4400 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
4401 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE_C;
4402 else
4403 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4404 }
4405
4406 pAsm->is_tex = GL_TRUE;
4407 if ( GL_TRUE == need_barrier )
4408
4409 pAsm->is_tex = GL_TRUE;
4410 if ( GL_TRUE == need_barrier )
4411 {
4412 pAsm->need_tex_barrier = GL_TRUE;
4413 }
4414 // Set src1 to tex unit id
4415 pAsm->S[1].src.reg = pAsm->SamplerUnits[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
4416 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4417
4418 //No sw info from mesa compiler, so hard code here.
4419 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4420 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4421 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4422 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4423
4424 if( GL_FALSE == tex_dst(pAsm) )
4425 {
4426 return GL_FALSE;
4427 }
4428
4429 if( GL_FALSE == tex_src(pAsm) )
4430 {
4431 return GL_FALSE;
4432 }
4433
4434 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4435 {
4436 /* hopefully did swizzles before */
4437 noswizzle_PVSSRC(&(pAsm->S[0].src));
4438 }
4439
4440 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4441 {
4442 /* SAMPLE dst, tmp.yxwy, CUBE */
4443 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4444 pAsm->S[0].src.swizzley = SQ_SEL_X;
4445 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4446 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4447 }
4448
4449 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
4450 {
4451 /* compare value goes to w chan ? */
4452 pAsm->S[0].src.swizzlew = SQ_SEL_Z;
4453 }
4454
4455 if ( GL_FALSE == next_ins(pAsm) )
4456 {
4457 return GL_FALSE;
4458 }
4459
4460 /* add ARB shadow ambient but clamp to 0..1 */
4461 if(pAsm->pILInst[pAsm->uiCurInst].TexShadow == 1)
4462 {
4463 /* ADD_SAT dst, dst, ambient[texunit] */
4464 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4465
4466 if( GL_FALSE == assemble_dst(pAsm) )
4467 {
4468 return GL_FALSE;
4469 }
4470 pAsm->D2.dst2.SaturateMode = 1;
4471
4472 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4473 pAsm->S[0].src.reg = pAsm->D.dst.reg;
4474 noswizzle_PVSSRC(&(pAsm->S[0].src));
4475 noneg_PVSSRC(&(pAsm->S[0].src));
4476
4477 pAsm->S[1].src.rtype = SRC_REG_CONSTANT;
4478 pAsm->S[1].src.reg = pAsm->shadow_regs[pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit];
4479 noswizzle_PVSSRC(&(pAsm->S[1].src));
4480 noneg_PVSSRC(&(pAsm->S[1].src));
4481
4482 if( GL_FALSE == next_ins(pAsm) )
4483 {
4484 return GL_FALSE;
4485 }
4486
4487 }
4488
4489 return GL_TRUE;
4490 }
4491
4492 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4493 {
4494 BITS tmp;
4495
4496 if( GL_FALSE == checkop2(pAsm) )
4497 {
4498 return GL_FALSE;
4499 }
4500
4501 tmp = gethelpr(pAsm);
4502
4503 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4504
4505 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4506 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4507 pAsm->D.dst.reg = tmp;
4508 nomask_PVSDST(&(pAsm->D.dst));
4509
4510 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4511 {
4512 return GL_FALSE;
4513 }
4514
4515 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4516 {
4517 return GL_FALSE;
4518 }
4519
4520 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4521 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4522
4523 if( GL_FALSE == next_ins(pAsm) )
4524 {
4525 return GL_FALSE;
4526 }
4527
4528 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4529 pAsm->D.dst.op3 = 1;
4530
4531 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4532 {
4533 tmp = gethelpr(pAsm);
4534
4535 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4536 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4537 pAsm->D.dst.reg = tmp;
4538
4539 nomask_PVSDST(&(pAsm->D.dst));
4540 }
4541 else
4542 {
4543 if( GL_FALSE == assemble_dst(pAsm) )
4544 {
4545 return GL_FALSE;
4546 }
4547 }
4548
4549 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4550 {
4551 return GL_FALSE;
4552 }
4553
4554 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4555 {
4556 return GL_FALSE;
4557 }
4558
4559 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4560 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4561
4562 // result1 + (neg) result0
4563 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4564 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4565 pAsm->S[2].src.reg = tmp;
4566
4567 neg_PVSSRC(&(pAsm->S[2].src));
4568 noswizzle_PVSSRC(&(pAsm->S[2].src));
4569
4570 if( GL_FALSE == next_ins(pAsm) )
4571 {
4572 return GL_FALSE;
4573 }
4574
4575
4576 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4577 {
4578 if( GL_FALSE == assemble_dst(pAsm) )
4579 {
4580 return GL_FALSE;
4581 }
4582
4583 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4584
4585 // Use tmp as source
4586 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4587 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4588 pAsm->S[0].src.reg = tmp;
4589
4590 noneg_PVSSRC(&(pAsm->S[0].src));
4591 noswizzle_PVSSRC(&(pAsm->S[0].src));
4592
4593 if( GL_FALSE == next_ins(pAsm) )
4594 {
4595 return GL_FALSE;
4596 }
4597 }
4598
4599 return GL_TRUE;
4600 }
4601
4602 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4603 {
4604 return GL_TRUE;
4605 }
4606
4607 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
4608 {
4609 switch (uReason)
4610 {
4611 case FC_PUSH_VPM:
4612 pAsm->CALLSTACK[pAsm->CALLSP].current--;
4613 break;
4614 case FC_PUSH_WQM:
4615 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4616 break;
4617 case FC_LOOP:
4618 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4619 break;
4620 case FC_REP:
4621 /* TODO : for 16 vp asic, should -= 2; */
4622 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
4623 break;
4624 };
4625 }
4626
4627 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
4628 {
4629 if(GL_TRUE == bCheckMaxOnly)
4630 {
4631 switch (uReason)
4632 {
4633 case FC_PUSH_VPM:
4634 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
4635 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4636 {
4637 pAsm->CALLSTACK[pAsm->CALLSP].max =
4638 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
4639 }
4640 break;
4641 case FC_PUSH_WQM:
4642 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
4643 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4644 {
4645 pAsm->CALLSTACK[pAsm->CALLSP].max =
4646 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
4647 }
4648 break;
4649 }
4650 return;
4651 }
4652
4653 switch (uReason)
4654 {
4655 case FC_PUSH_VPM:
4656 pAsm->CALLSTACK[pAsm->CALLSP].current++;
4657 break;
4658 case FC_PUSH_WQM:
4659 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4660 break;
4661 case FC_LOOP:
4662 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
4663 break;
4664 case FC_REP:
4665 /* TODO : for 16 vp asic, should += 2; */
4666 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
4667 break;
4668 };
4669
4670 if(pAsm->CALLSTACK[pAsm->CALLSP].current
4671 > pAsm->CALLSTACK[pAsm->CALLSP].max)
4672 {
4673 pAsm->CALLSTACK[pAsm->CALLSP].max =
4674 pAsm->CALLSTACK[pAsm->CALLSP].current;
4675 }
4676 }
4677
4678 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
4679 {
4680 if(GL_FALSE == add_cf_instruction(pAsm) )
4681 {
4682 return GL_FALSE;
4683 }
4684
4685 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4686 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4687 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4688
4689 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4690 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4691 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4692 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4693
4694 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4695
4696 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
4697
4698 return GL_TRUE;
4699 }
4700
4701 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
4702 {
4703 if(GL_FALSE == add_cf_instruction(pAsm) )
4704 {
4705 return GL_FALSE;
4706 }
4707
4708 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4709 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4710 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4711
4712 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4713 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4714 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4715
4716 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4717
4718 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4719 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4720
4721 return GL_TRUE;
4722 }
4723
4724 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
4725 {
4726 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4727
4728 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4729
4730
4731 if(GL_FALSE == add_cf_instruction(pAsm) )
4732 {
4733 return GL_FALSE;
4734 }
4735
4736 if(GL_TRUE != bHasElse)
4737 {
4738 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4739 }
4740 else
4741 {
4742 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4743 }
4744 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4745 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4746
4747 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4748 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4749 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4750 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4751
4752 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4753
4754 pAsm->FCSP++;
4755 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
4756 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
4757 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
4758 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
4759
4760 #ifndef USE_CF_FOR_POP_AFTER
4761 if(GL_TRUE != bHasElse)
4762 {
4763 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4764 }
4765 #endif /* USE_CF_FOR_POP_AFTER */
4766
4767 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
4768
4769 return GL_TRUE;
4770 }
4771
4772 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
4773 {
4774 if(GL_FALSE == add_cf_instruction(pAsm) )
4775 {
4776 return GL_FALSE;
4777 }
4778
4779 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
4780 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4781 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4782
4783 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4784 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4785 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
4786 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4787
4788 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4789
4790 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
4791 0,
4792 sizeof(R700ControlFlowGenericClause *) );
4793 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
4794 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
4795
4796 #ifndef USE_CF_FOR_POP_AFTER
4797 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
4798 #endif /* USE_CF_FOR_POP_AFTER */
4799
4800 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
4801
4802 return GL_TRUE;
4803 }
4804
4805 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
4806 {
4807 #ifdef USE_CF_FOR_POP_AFTER
4808 pops(pAsm, 1);
4809 #endif /* USE_CF_FOR_POP_AFTER */
4810
4811 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
4812
4813 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
4814 {
4815 /* no else in between */
4816 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4817 }
4818 else
4819 {
4820 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
4821 }
4822
4823 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
4824 {
4825 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
4826 }
4827
4828 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
4829 {
4830 radeon_error("if/endif in shader code are not paired. \n");
4831 return GL_FALSE;
4832 }
4833
4834 pAsm->FCSP--;
4835
4836 decreaseCurrent(pAsm, FC_PUSH_VPM);
4837
4838 return GL_TRUE;
4839 }
4840
4841 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
4842 {
4843 if(GL_FALSE == add_cf_instruction(pAsm) )
4844 {
4845 return GL_FALSE;
4846 }
4847
4848
4849 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
4850 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4851 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4852
4853 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4854 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4855 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
4856 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4857
4858 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4859
4860 pAsm->FCSP++;
4861 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
4862 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
4863 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
4864 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
4865 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
4866
4867 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
4868
4869 return GL_TRUE;
4870 }
4871
4872 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
4873 {
4874 #ifdef USE_CF_FOR_CONTINUE_BREAK
4875
4876 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4877
4878 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4879
4880 unsigned int unFCSP;
4881 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
4882 {
4883 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
4884 {
4885 break;
4886 }
4887 }
4888 if(0 == FC_LOOP)
4889 {
4890 radeon_error("Break is not inside loop/endloop pair.\n");
4891 return GL_FALSE;
4892 }
4893
4894 if(GL_FALSE == add_cf_instruction(pAsm) )
4895 {
4896 return GL_FALSE;
4897 }
4898
4899
4900 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4901 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4902 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4903
4904 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4905 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4906 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
4907
4908 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4909
4910 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4911
4912 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
4913 (void *)pAsm->fc_stack[unFCSP].mid,
4914 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
4915 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
4916 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
4917 pAsm->fc_stack[unFCSP].unNumMid++;
4918
4919 if(GL_FALSE == add_cf_instruction(pAsm) )
4920 {
4921 return GL_FALSE;
4922 }
4923
4924 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4925 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4926 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4927
4928 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4929 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4930 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
4931
4932 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4933
4934 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4935 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
4936
4937 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
4938
4939 #endif //USE_CF_FOR_CONTINUE_BREAK
4940 return GL_TRUE;
4941 }
4942
4943 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
4944 {
4945 #ifdef USE_CF_FOR_CONTINUE_BREAK
4946 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
4947
4948 assemble_LOGIC_PRED(pAsm, SQ_OP2_INST_PRED_SETNE);
4949
4950 unsigned int unFCSP;
4951 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
4952 {
4953 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
4954 {
4955 break;
4956 }
4957 }
4958 if(0 == FC_LOOP)
4959 {
4960 radeon_error("Continue is not inside loop/endloop pair.\n");
4961 return GL_FALSE;
4962 }
4963
4964 if(GL_FALSE == add_cf_instruction(pAsm) )
4965 {
4966 return GL_FALSE;
4967 }
4968
4969
4970 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4971 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4972 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4973
4974 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4975 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4976 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
4977
4978 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4979
4980 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4981
4982 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
4983 (void *)pAsm->fc_stack[unFCSP].mid,
4984 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
4985 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
4986 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
4987 pAsm->fc_stack[unFCSP].unNumMid++;
4988
4989 if(GL_FALSE == add_cf_instruction(pAsm) )
4990 {
4991 return GL_FALSE;
4992 }
4993
4994 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
4995 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4996 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4997
4998 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4999 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5000 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5001
5002 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5003
5004 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5005 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5006
5007 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5008
5009 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5010
5011 return GL_TRUE;
5012 }
5013
5014 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
5015 {
5016 GLuint i;
5017
5018 if(GL_FALSE == add_cf_instruction(pAsm) )
5019 {
5020 return GL_FALSE;
5021 }
5022
5023
5024 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5025 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5026 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5027
5028 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5029 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5030 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
5031 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5032
5033 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5034
5035 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
5036 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5037
5038 #ifdef USE_CF_FOR_CONTINUE_BREAK
5039 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
5040 {
5041 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
5042 }
5043 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5044 {
5045 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5046 }
5047 #endif
5048
5049 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
5050 {
5051 radeon_error("loop/endloop in shader code are not paired. \n");
5052 return GL_FALSE;
5053 }
5054
5055 GLuint unFCSP;
5056 GLuint unIF = 0;
5057 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
5058 {
5059 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5060 {
5061 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5062 {
5063 breakLoopOnFlag(pAsm, unFCSP);
5064 break;
5065 }
5066 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5067 {
5068 unIF++;
5069 }
5070 }
5071 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
5072 {
5073 #ifdef USE_CF_FOR_POP_AFTER
5074 returnOnFlag(pAsm, unIF);
5075 #else
5076 returnOnFlag(pAsm, 0);
5077 #endif /* USE_CF_FOR_POP_AFTER */
5078 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
5079 }
5080 }
5081
5082 pAsm->FCSP--;
5083
5084 decreaseCurrent(pAsm, FC_LOOP);
5085
5086 return GL_TRUE;
5087 }
5088
5089 void add_return_inst(r700_AssemblerBase *pAsm)
5090 {
5091 if(GL_FALSE == add_cf_instruction(pAsm) )
5092 {
5093 return;
5094 }
5095 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5096 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5097 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5098 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5099
5100 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5101 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5102 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
5103 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5104
5105 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5106 }
5107
5108 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex, GLuint uiIL_Shift)
5109 {
5110 /* Put in sub */
5111 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
5112 {
5113 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
5114 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
5115 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
5116 if(NULL == pAsm->subs)
5117 {
5118 return GL_FALSE;
5119 }
5120 pAsm->unSubArraySize += 10;
5121 }
5122
5123 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex + uiIL_Shift;
5124 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
5125 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
5126 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
5127
5128 pAsm->CALLSP++;
5129 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
5130 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
5131 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
5132 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
5133 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
5134 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
5135 SetActiveCFlist(pAsm->pR700Shader,
5136 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5137
5138 pAsm->unSubArrayPointer++;
5139
5140 /* start sub */
5141 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5142
5143 pAsm->FCSP++;
5144 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
5145
5146 checkStackDepth(pAsm, FC_REP, GL_FALSE);
5147
5148 return GL_TRUE;
5149 }
5150
5151 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
5152 {
5153 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
5154 {
5155 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5156 return GL_FALSE;
5157 }
5158
5159 /* copy max to sub structure */
5160 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
5161 = pAsm->CALLSTACK[pAsm->CALLSP].max;
5162
5163 decreaseCurrent(pAsm, FC_REP);
5164
5165 pAsm->CALLSP--;
5166 SetActiveCFlist(pAsm->pR700Shader,
5167 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5168
5169 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5170
5171 pAsm->FCSP--;
5172
5173 return GL_TRUE;
5174 }
5175
5176 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
5177 {
5178 GLuint unIF = 0;
5179
5180 if(pAsm->CALLSP > 0)
5181 { /* in sub */
5182 GLuint unFCSP;
5183 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5184 {
5185 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5186 {
5187 setRetInLoopFlag(pAsm, SQ_SEL_1);
5188 breakLoopOnFlag(pAsm, unFCSP);
5189 pAsm->unCFflags |= LOOPRET_FLAGS;
5190
5191 return GL_TRUE;
5192 }
5193 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5194 {
5195 unIF++;
5196 }
5197 }
5198 }
5199
5200 #ifdef USE_CF_FOR_POP_AFTER
5201 if(unIF > 0)
5202 {
5203 pops(pAsm, unIF);
5204 }
5205 #endif /* USE_CF_FOR_POP_AFTER */
5206
5207 add_return_inst(pAsm);
5208
5209 return GL_TRUE;
5210 }
5211
5212 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
5213 GLint nILindex,
5214 GLuint uiIL_Shift,
5215 GLuint uiNumberInsts,
5216 struct prog_instruction *pILInst,
5217 PRESUB_DESC * pPresubDesc)
5218 {
5219 GLint uiIL_Offset;
5220
5221 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5222
5223 if(GL_FALSE == add_cf_instruction(pAsm) )
5224 {
5225 return GL_FALSE;
5226 }
5227
5228 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
5229 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5230 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5231 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5232
5233 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5234 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5235 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
5236 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5237
5238 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5239
5240 /* Put in caller */
5241 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
5242 {
5243 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
5244 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
5245 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
5246 if(NULL == pAsm->callers)
5247 {
5248 return GL_FALSE;
5249 }
5250 pAsm->unCallerArraySize += 10;
5251 }
5252
5253 uiIL_Offset = nILindex + uiIL_Shift;
5254 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = uiIL_Offset;
5255 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
5256
5257 pAsm->callers[pAsm->unCallerArrayPointer].finale_cf_ptr = NULL;
5258 pAsm->callers[pAsm->unCallerArrayPointer].prelude_cf_ptr = NULL;
5259
5260 pAsm->unCallerArrayPointer++;
5261
5262 int j;
5263 GLuint max;
5264 GLuint unSubID;
5265 GLboolean bRet;
5266 for(j=0; j<pAsm->unSubArrayPointer; j++)
5267 {
5268 if(uiIL_Offset == pAsm->subs[j].subIL_Offset)
5269 { /* compiled before */
5270
5271 max = pAsm->subs[j].unStackDepthMax
5272 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5273 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5274 {
5275 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5276 }
5277
5278 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
5279 return GL_TRUE;
5280 }
5281 }
5282
5283 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
5284 unSubID = pAsm->unSubArrayPointer;
5285
5286 bRet = AssembleInstr(nILindex, uiIL_Shift, uiNumberInsts, pILInst, pAsm);
5287
5288 if(GL_TRUE == bRet)
5289 {
5290 max = pAsm->subs[unSubID].unStackDepthMax
5291 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5292 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5293 {
5294 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5295 }
5296
5297 pAsm->subs[unSubID].pPresubDesc = pPresubDesc;
5298 }
5299
5300 return bRet;
5301 }
5302
5303 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
5304 {
5305 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5306
5307 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5308 pAsm->D.dst.op3 = 0;
5309 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5310 pAsm->D.dst.reg = pAsm->flag_reg_index;
5311 pAsm->D.dst.writex = 1;
5312 pAsm->D.dst.writey = 0;
5313 pAsm->D.dst.writez = 0;
5314 pAsm->D.dst.writew = 0;
5315 pAsm->D2.dst2.literal_slots = 1;
5316 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5317 pAsm->D.dst.predicated = 0;
5318 /* in reloc where dislink flag init inst, only one slot alu inst is handled. */
5319 pAsm->D.dst.math = 1; /* TODO : not math really, but one channel op, more generic alu assembler needed */
5320 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5321 #if 0
5322 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
5323 //pAsm->S[0].src.reg = 0;
5324 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5325 noneg_PVSSRC(&(pAsm->S[0].src));
5326 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5327 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5328 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5329 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5330
5331 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5332 {
5333 return GL_FALSE;
5334 }
5335 #else
5336 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5337 pAsm->S[0].src.reg = 0;
5338 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5339 noneg_PVSSRC(&(pAsm->S[0].src));
5340 pAsm->S[0].src.swizzlex = flagValue;
5341 pAsm->S[0].src.swizzley = flagValue;
5342 pAsm->S[0].src.swizzlez = flagValue;
5343 pAsm->S[0].src.swizzlew = flagValue;
5344
5345 if( GL_FALSE == next_ins(pAsm) )
5346 {
5347 return GL_FALSE;
5348 }
5349 #endif
5350
5351 return GL_TRUE;
5352 }
5353
5354 GLboolean testFlag(r700_AssemblerBase *pAsm)
5355 {
5356 /*GLfloat fLiteral[2] = {0.1, 0.0};*/
5357
5358 //Test flag
5359 GLuint tmp = gethelpr(pAsm);
5360 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5361
5362 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
5363 pAsm->D.dst.math = 1;
5364 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5365 pAsm->D.dst.reg = tmp;
5366 pAsm->D.dst.writex = 1;
5367 pAsm->D.dst.writey = 0;
5368 pAsm->D.dst.writez = 0;
5369 pAsm->D.dst.writew = 0;
5370 pAsm->D2.dst2.literal_slots = 1;
5371 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5372 pAsm->D.dst.predicated = 1;
5373 pAsm->D2.dst2.index_mode = SQ_INDEX_LOOP; /* Check this ! */
5374
5375 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5376 pAsm->S[0].src.reg = pAsm->flag_reg_index;
5377 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5378 noneg_PVSSRC(&(pAsm->S[0].src));
5379 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5380 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5381 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5382 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5383 #if 0
5384 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
5385 //pAsm->S[1].src.reg = 0;
5386 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5387 noneg_PVSSRC(&(pAsm->S[1].src));
5388 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5389 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5390 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5391 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5392
5393 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5394 {
5395 return GL_FALSE;
5396 }
5397 #else
5398 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
5399 pAsm->S[1].src.reg = 0;
5400 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5401 noneg_PVSSRC(&(pAsm->S[1].src));
5402 pAsm->S[1].src.swizzlex = SQ_SEL_1;
5403 pAsm->S[1].src.swizzley = SQ_SEL_1;
5404 pAsm->S[1].src.swizzlez = SQ_SEL_1;
5405 pAsm->S[1].src.swizzlew = SQ_SEL_1;
5406
5407 if( GL_FALSE == next_ins(pAsm) )
5408 {
5409 return GL_FALSE;
5410 }
5411 #endif
5412
5413 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5414
5415 return GL_TRUE;
5416 }
5417
5418 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
5419 {
5420 testFlag(pAsm);
5421 jumpToOffest(pAsm, 1, 4);
5422 setRetInLoopFlag(pAsm, SQ_SEL_0);
5423 pops(pAsm, unIF + 1);
5424 add_return_inst(pAsm);
5425
5426 return GL_TRUE;
5427 }
5428
5429 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
5430 {
5431 testFlag(pAsm);
5432
5433 //break
5434 if(GL_FALSE == add_cf_instruction(pAsm) )
5435 {
5436 return GL_FALSE;
5437 }
5438
5439 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5440 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5441 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5442
5443 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5444 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5445 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5446 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5447
5448 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5449
5450 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5451 (void *)pAsm->fc_stack[unFCSP].mid,
5452 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5453 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5454 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5455 pAsm->fc_stack[unFCSP].unNumMid++;
5456
5457 pops(pAsm, 1);
5458
5459 return GL_TRUE;
5460 }
5461
5462 GLboolean AssembleInstr(GLuint uiFirstInst,
5463 GLuint uiIL_Shift,
5464 GLuint uiNumberInsts,
5465 struct prog_instruction *pILInst,
5466 r700_AssemblerBase *pR700AsmCode)
5467 {
5468 GLuint i;
5469
5470 pR700AsmCode->pILInst = pILInst;
5471 for(i=uiFirstInst; i<uiNumberInsts; i++)
5472 {
5473 pR700AsmCode->uiCurInst = i;
5474
5475 #ifndef USE_CF_FOR_CONTINUE_BREAK
5476 if(OPCODE_BRK == pILInst[i+1].Opcode)
5477 {
5478 switch(pILInst[i].Opcode)
5479 {
5480 case OPCODE_SLE:
5481 pILInst[i].Opcode = OPCODE_SGT;
5482 break;
5483 case OPCODE_SLT:
5484 pILInst[i].Opcode = OPCODE_SGE;
5485 break;
5486 case OPCODE_SGE:
5487 pILInst[i].Opcode = OPCODE_SLT;
5488 break;
5489 case OPCODE_SGT:
5490 pILInst[i].Opcode = OPCODE_SLE;
5491 break;
5492 case OPCODE_SEQ:
5493 pILInst[i].Opcode = OPCODE_SNE;
5494 break;
5495 case OPCODE_SNE:
5496 pILInst[i].Opcode = OPCODE_SEQ;
5497 break;
5498 default:
5499 break;
5500 }
5501 }
5502 #endif
5503 if(pILInst[i].CondUpdate == 1)
5504 {
5505 /* remember dest register used for cond evaluation */
5506 /* XXX also handle PROGRAM_OUTPUT registers here? */
5507 pR700AsmCode->last_cond_register = pILInst[i].DstReg.Index;
5508 }
5509
5510 switch (pILInst[i].Opcode)
5511 {
5512 case OPCODE_ABS:
5513 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
5514 return GL_FALSE;
5515 break;
5516 case OPCODE_ADD:
5517 case OPCODE_SUB:
5518 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
5519 return GL_FALSE;
5520 break;
5521
5522 case OPCODE_ARL:
5523 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
5524 return GL_FALSE;
5525 break;
5526 case OPCODE_ARR:
5527 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5528 //if ( GL_FALSE == assemble_BAD("ARR") )
5529 return GL_FALSE;
5530 break;
5531
5532 case OPCODE_CMP:
5533 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
5534 return GL_FALSE;
5535 break;
5536 case OPCODE_COS:
5537 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_COS) )
5538 return GL_FALSE;
5539 break;
5540
5541 case OPCODE_DP3:
5542 case OPCODE_DP4:
5543 case OPCODE_DPH:
5544 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
5545 return GL_FALSE;
5546 break;
5547
5548 case OPCODE_DST:
5549 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
5550 return GL_FALSE;
5551 break;
5552
5553 case OPCODE_EX2:
5554 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
5555 return GL_FALSE;
5556 break;
5557 case OPCODE_EXP:
5558 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
5559 return GL_FALSE;
5560 break;
5561
5562 case OPCODE_FLR:
5563 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
5564 return GL_FALSE;
5565 break;
5566 //case OP_FLR_INT: ;
5567
5568 // if ( GL_FALSE == assemble_FLR_INT() )
5569 // return GL_FALSE;
5570 // break;
5571
5572 case OPCODE_FRC:
5573 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
5574 return GL_FALSE;
5575 break;
5576
5577 case OPCODE_KIL:
5578 case OPCODE_KIL_NV:
5579 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
5580 return GL_FALSE;
5581 break;
5582 case OPCODE_LG2:
5583 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
5584 return GL_FALSE;
5585 break;
5586 case OPCODE_LIT:
5587 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
5588 return GL_FALSE;
5589 break;
5590 case OPCODE_LRP:
5591 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
5592 return GL_FALSE;
5593 break;
5594 case OPCODE_LOG:
5595 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
5596 return GL_FALSE;
5597 break;
5598
5599 case OPCODE_MAD:
5600 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
5601 return GL_FALSE;
5602 break;
5603 case OPCODE_MAX:
5604 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
5605 return GL_FALSE;
5606 break;
5607 case OPCODE_MIN:
5608 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
5609 return GL_FALSE;
5610 break;
5611
5612 case OPCODE_MOV:
5613 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5614 return GL_FALSE;
5615 break;
5616 case OPCODE_MUL:
5617 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
5618 return GL_FALSE;
5619 break;
5620
5621 case OPCODE_NOISE1:
5622 {
5623 callPreSub(pR700AsmCode,
5624 GLSL_NOISE1,
5625 &noise1_presub,
5626 pILInst->DstReg.Index + pR700AsmCode->starting_temp_register_number,
5627 1);
5628 radeon_error("noise1: not yet supported shader instruction\n");
5629 };
5630 break;
5631 case OPCODE_NOISE2:
5632 radeon_error("noise2: not yet supported shader instruction\n");
5633 break;
5634 case OPCODE_NOISE3:
5635 radeon_error("noise3: not yet supported shader instruction\n");
5636 break;
5637 case OPCODE_NOISE4:
5638 radeon_error("noise4: not yet supported shader instruction\n");
5639 break;
5640
5641 case OPCODE_POW:
5642 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
5643 return GL_FALSE;
5644 break;
5645 case OPCODE_RCP:
5646 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
5647 return GL_FALSE;
5648 break;
5649 case OPCODE_RSQ:
5650 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
5651 return GL_FALSE;
5652 break;
5653 case OPCODE_SIN:
5654 if ( GL_FALSE == assemble_TRIG(pR700AsmCode, SQ_OP2_INST_SIN) )
5655 return GL_FALSE;
5656 break;
5657 case OPCODE_SCS:
5658 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
5659 return GL_FALSE;
5660 break;
5661
5662 case OPCODE_SEQ:
5663 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
5664 {
5665 return GL_FALSE;
5666 }
5667 break;
5668
5669 case OPCODE_SGT:
5670 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5671 {
5672 return GL_FALSE;
5673 }
5674 break;
5675
5676 case OPCODE_SGE:
5677 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
5678 {
5679 return GL_FALSE;
5680 }
5681 break;
5682
5683 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5684 case OPCODE_SLT:
5685 {
5686 struct prog_src_register SrcRegSave[2];
5687 SrcRegSave[0] = pILInst[i].SrcReg[0];
5688 SrcRegSave[1] = pILInst[i].SrcReg[1];
5689 pILInst[i].SrcReg[0] = SrcRegSave[1];
5690 pILInst[i].SrcReg[1] = SrcRegSave[0];
5691 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5692 {
5693 pILInst[i].SrcReg[0] = SrcRegSave[0];
5694 pILInst[i].SrcReg[1] = SrcRegSave[1];
5695 return GL_FALSE;
5696 }
5697 pILInst[i].SrcReg[0] = SrcRegSave[0];
5698 pILInst[i].SrcReg[1] = SrcRegSave[1];
5699 }
5700 break;
5701
5702 case OPCODE_SLE:
5703 {
5704 struct prog_src_register SrcRegSave[2];
5705 SrcRegSave[0] = pILInst[i].SrcReg[0];
5706 SrcRegSave[1] = pILInst[i].SrcReg[1];
5707 pILInst[i].SrcReg[0] = SrcRegSave[1];
5708 pILInst[i].SrcReg[1] = SrcRegSave[0];
5709 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
5710 {
5711 pILInst[i].SrcReg[0] = SrcRegSave[0];
5712 pILInst[i].SrcReg[1] = SrcRegSave[1];
5713 return GL_FALSE;
5714 }
5715 pILInst[i].SrcReg[0] = SrcRegSave[0];
5716 pILInst[i].SrcReg[1] = SrcRegSave[1];
5717 }
5718 break;
5719
5720 case OPCODE_SNE:
5721 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
5722 {
5723 return GL_FALSE;
5724 }
5725 break;
5726
5727 //case OP_STP:
5728 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
5729 // return GL_FALSE;
5730 // break;
5731
5732 case OPCODE_SWZ:
5733 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5734 {
5735 return GL_FALSE;
5736 }
5737 else
5738 {
5739 if( (i+1)<uiNumberInsts )
5740 {
5741 if(OPCODE_END != pILInst[i+1].Opcode)
5742 {
5743 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
5744 {
5745 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
5746 }
5747 }
5748 }
5749 }
5750 break;
5751 case OPCODE_DDX:
5752 case OPCODE_DDY:
5753 case OPCODE_TEX:
5754 case OPCODE_TXB:
5755 case OPCODE_TXP:
5756 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
5757 return GL_FALSE;
5758 break;
5759
5760 case OPCODE_TRUNC:
5761 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
5762 return GL_FALSE;
5763 break;
5764
5765 case OPCODE_XPD:
5766 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
5767 return GL_FALSE;
5768 break;
5769
5770 case OPCODE_IF:
5771 {
5772 GLboolean bHasElse = GL_FALSE;
5773
5774 if(pILInst[pILInst[i].BranchTarget].Opcode == OPCODE_ELSE)
5775 {
5776 bHasElse = GL_TRUE;
5777 }
5778
5779 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
5780 {
5781 return GL_FALSE;
5782 }
5783 }
5784 break;
5785
5786 case OPCODE_ELSE :
5787 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
5788 return GL_FALSE;
5789 break;
5790
5791 case OPCODE_ENDIF:
5792 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
5793 return GL_FALSE;
5794 break;
5795
5796 case OPCODE_BGNLOOP:
5797 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
5798 {
5799 return GL_FALSE;
5800 }
5801 break;
5802
5803 case OPCODE_BRK:
5804 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
5805 {
5806 return GL_FALSE;
5807 }
5808 break;
5809
5810 case OPCODE_CONT:
5811 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
5812 {
5813 return GL_FALSE;
5814 }
5815 break;
5816
5817 case OPCODE_ENDLOOP:
5818 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
5819 {
5820 return GL_FALSE;
5821 }
5822 break;
5823
5824 case OPCODE_BGNSUB:
5825 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i, uiIL_Shift) )
5826 {
5827 return GL_FALSE;
5828 }
5829 break;
5830
5831 case OPCODE_RET:
5832 if( GL_FALSE == assemble_RET(pR700AsmCode) )
5833 {
5834 return GL_FALSE;
5835 }
5836 break;
5837
5838 case OPCODE_CAL:
5839 if( GL_FALSE == assemble_CAL(pR700AsmCode,
5840 pILInst[i].BranchTarget,
5841 uiIL_Shift,
5842 uiNumberInsts,
5843 pILInst,
5844 NULL) )
5845 {
5846 return GL_FALSE;
5847 }
5848 break;
5849
5850 //case OPCODE_EXPORT:
5851 // if ( GL_FALSE == assemble_EXPORT() )
5852 // return GL_FALSE;
5853 // break;
5854
5855 case OPCODE_ENDSUB:
5856 return assemble_ENDSUB(pR700AsmCode);
5857
5858 case OPCODE_END:
5859 //pR700AsmCode->uiCurInst = i;
5860 //This is to remaind that if in later exoort there is depth/stencil
5861 //export, we need a mov to re-arrange DST channel, where using a
5862 //psuedo inst, we will use this end inst to do it.
5863 return GL_TRUE;
5864
5865 default:
5866 radeon_error("internal: unknown instruction\n");
5867 return GL_FALSE;
5868 }
5869 }
5870
5871 return GL_TRUE;
5872 }
5873
5874 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
5875 {
5876 setRetInLoopFlag(pAsm, SQ_SEL_0);
5877 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5878 return GL_TRUE;
5879 }
5880
5881 GLboolean RelocProgram(r700_AssemblerBase * pAsm, struct gl_program * pILProg)
5882 {
5883 GLuint i;
5884 GLuint unCFoffset;
5885 TypedShaderList * plstCFmain;
5886 TypedShaderList * plstCFsub;
5887
5888 R700ShaderInstruction * pInst;
5889 R700ControlFlowGenericClause * pCFInst;
5890
5891 R700ControlFlowALUClause * pCF_ALU;
5892 R700ALUInstruction * pALU;
5893 GLuint unConstOffset = 0;
5894 GLuint unRegOffset;
5895 GLuint unMinRegIndex;
5896
5897 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
5898
5899 /* remove flags init if they are not used */
5900 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
5901 {
5902 R700ControlFlowALUClause * pCF_ALU;
5903 pInst = plstCFmain->pHead;
5904 while(pInst)
5905 {
5906 if(SIT_CF_ALU == pInst->m_ShaderInstType)
5907 {
5908 pCF_ALU = (R700ControlFlowALUClause *)pInst;
5909 if(0 == pCF_ALU->m_Word1.f.count)
5910 {
5911 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
5912 }
5913 else
5914 {
5915 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
5916
5917 pALU->m_pLinkedALUClause = NULL;
5918 pALU = (R700ALUInstruction *)(pALU->pNextInst);
5919 pALU->m_pLinkedALUClause = pCF_ALU;
5920 pCF_ALU->m_pLinkedALUInstruction = pALU;
5921
5922 pCF_ALU->m_Word1.f.count--;
5923 }
5924 break;
5925 }
5926 pInst = pInst->pNextInst;
5927 };
5928 }
5929
5930 if(pAsm->CALLSTACK[0].max > 0)
5931 {
5932 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
5933 }
5934
5935 if(0 == pAsm->unSubArrayPointer)
5936 {
5937 return GL_TRUE;
5938 }
5939
5940 unCFoffset = plstCFmain->uNumOfNode;
5941
5942 if(NULL != pILProg->Parameters)
5943 {
5944 unConstOffset = pILProg->Parameters->NumParameters;
5945 }
5946
5947 /* Reloc subs */
5948 for(i=0; i<pAsm->unSubArrayPointer; i++)
5949 {
5950 pAsm->subs[i].unCFoffset = unCFoffset;
5951 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
5952
5953 pInst = plstCFsub->pHead;
5954
5955 /* reloc instructions */
5956 while(pInst)
5957 {
5958 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
5959 {
5960 pCFInst = (R700ControlFlowGenericClause *)pInst;
5961
5962 switch (pCFInst->m_Word1.f.cf_inst)
5963 {
5964 case SQ_CF_INST_POP:
5965 case SQ_CF_INST_JUMP:
5966 case SQ_CF_INST_ELSE:
5967 case SQ_CF_INST_LOOP_END:
5968 case SQ_CF_INST_LOOP_START:
5969 case SQ_CF_INST_LOOP_START_NO_AL:
5970 case SQ_CF_INST_LOOP_CONTINUE:
5971 case SQ_CF_INST_LOOP_BREAK:
5972 pCFInst->m_Word0.f.addr += unCFoffset;
5973 break;
5974 default:
5975 break;
5976 }
5977 }
5978
5979 pInst->m_uIndex += unCFoffset;
5980
5981 pInst = pInst->pNextInst;
5982 };
5983
5984 if(NULL != pAsm->subs[i].pPresubDesc)
5985 {
5986 GLuint uNumSrc;
5987
5988 unMinRegIndex = pAsm->subs[i].pPresubDesc->pCompiledSub->MinRegIndex;
5989 unRegOffset = pAsm->subs[i].pPresubDesc->maxStartReg;
5990 unConstOffset += pAsm->subs[i].pPresubDesc->unConstantsStart;
5991
5992 pInst = plstCFsub->pHead;
5993 while(pInst)
5994 {
5995 if(SIT_CF_ALU == pInst->m_ShaderInstType)
5996 {
5997 pCF_ALU = (R700ControlFlowALUClause *)pInst;
5998
5999 pALU = pCF_ALU->m_pLinkedALUInstruction;
6000 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6001 {
6002 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
6003
6004 if(pALU->m_Word0.f.src0_sel < SQ_ALU_SRC_GPR_SIZE)
6005 {
6006 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
6007 }
6008 else if(pALU->m_Word0.f.src0_sel >= SQ_ALU_SRC_CFILE_BASE)
6009 {
6010 pALU->m_Word0.f.src0_sel += unConstOffset;
6011 }
6012
6013 if( ((pALU->m_Word1.val >> SQ_ALU_WORD1_OP3_ALU_INST_SHIFT) & 0x0000001F)
6014 >= SQ_OP3_INST_MUL_LIT )
6015 { /* op3 : 3 srcs */
6016 if(pALU->m_Word1_OP3.f.src2_sel < SQ_ALU_SRC_GPR_SIZE)
6017 {
6018 pALU->m_Word1_OP3.f.src2_sel = pALU->m_Word1_OP3.f.src2_sel + unRegOffset - unMinRegIndex;
6019 }
6020 else if(pALU->m_Word1_OP3.f.src2_sel >= SQ_ALU_SRC_CFILE_BASE)
6021 {
6022 pALU->m_Word1_OP3.f.src2_sel += unConstOffset;
6023 }
6024 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
6025 {
6026 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
6027 }
6028 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
6029 {
6030 pALU->m_Word0.f.src1_sel += unConstOffset;
6031 }
6032 }
6033 else
6034 {
6035 if(pAsm->bR6xx)
6036 {
6037 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f6.alu_inst, 0);
6038 }
6039 else
6040 {
6041 uNumSrc = r700GetNumOperands(pALU->m_Word1_OP2.f.alu_inst, 0);
6042 }
6043 if(2 == uNumSrc)
6044 { /* 2 srcs */
6045 if(pALU->m_Word0.f.src1_sel < SQ_ALU_SRC_GPR_SIZE)
6046 {
6047 pALU->m_Word0.f.src1_sel = pALU->m_Word0.f.src1_sel + unRegOffset - unMinRegIndex;
6048 }
6049 else if(pALU->m_Word0.f.src1_sel >= SQ_ALU_SRC_CFILE_BASE)
6050 {
6051 pALU->m_Word0.f.src1_sel += unConstOffset;
6052 }
6053 }
6054 }
6055 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6056 }
6057 }
6058 pInst = pInst->pNextInst;
6059 };
6060 }
6061
6062 /* Put sub into main */
6063 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
6064 plstCFmain->pTail = plstCFsub->pTail;
6065 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
6066
6067 unCFoffset += plstCFsub->uNumOfNode;
6068 }
6069
6070 /* reloc callers */
6071 for(i=0; i<pAsm->unCallerArrayPointer; i++)
6072 {
6073 pAsm->callers[i].cf_ptr->m_Word0.f.addr
6074 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
6075
6076 if(NULL != pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc)
6077 {
6078 unMinRegIndex = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->pCompiledSub->MinRegIndex;
6079 unRegOffset = pAsm->subs[pAsm->callers[i].subDescIndex].pPresubDesc->maxStartReg;
6080
6081 if(NULL != pAsm->callers[i].prelude_cf_ptr)
6082 {
6083 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].prelude_cf_ptr);
6084 pALU = pCF_ALU->m_pLinkedALUInstruction;
6085 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6086 {
6087 pALU->m_Word1.f.dst_gpr = pALU->m_Word1.f.dst_gpr + unRegOffset - unMinRegIndex;
6088 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6089 }
6090 }
6091 if(NULL != pAsm->callers[i].finale_cf_ptr)
6092 {
6093 pCF_ALU = (R700ControlFlowALUClause * )(pAsm->callers[i].finale_cf_ptr);
6094 pALU = pCF_ALU->m_pLinkedALUInstruction;
6095 for(int j=0; j<=pCF_ALU->m_Word1.f.count; j++)
6096 {
6097 pALU->m_Word0.f.src0_sel = pALU->m_Word0.f.src0_sel + unRegOffset - unMinRegIndex;
6098 pALU = (R700ALUInstruction*)(pALU->pNextInst);
6099 }
6100 }
6101 }
6102 }
6103
6104 return GL_TRUE;
6105 }
6106
6107 GLboolean callPreSub(r700_AssemblerBase* pAsm,
6108 LOADABLE_SCRIPT_SIGNITURE scriptSigniture,
6109 COMPILED_SUB * pCompiledSub,
6110 GLshort uOutReg,
6111 GLshort uNumValidSrc)
6112 {
6113 /* save assemble context */
6114 GLuint starting_temp_register_number_save;
6115 GLuint number_used_registers_save;
6116 GLuint uFirstHelpReg_save;
6117 GLuint uHelpReg_save;
6118 GLuint uiCurInst_save;
6119 struct prog_instruction *pILInst_save;
6120 PRESUB_DESC * pPresubDesc;
6121 GLboolean bRet;
6122 int i;
6123
6124 R700ControlFlowGenericClause* prelude_cf_ptr = NULL;
6125
6126 /* copy srcs to presub inputs */
6127 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6128 for(i=0; i<uNumValidSrc; i++)
6129 {
6130 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6131 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6132 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6133 pAsm->D.dst.reg = pCompiledSub->srcRegIndex[i];
6134 pAsm->D.dst.writex = 1;
6135 pAsm->D.dst.writey = 1;
6136 pAsm->D.dst.writez = 1;
6137 pAsm->D.dst.writew = 1;
6138
6139 if( GL_FALSE == assemble_src(pAsm, i, 0) )
6140 {
6141 return GL_FALSE;
6142 }
6143
6144 next_ins(pAsm);
6145 }
6146 if(uNumValidSrc > 0)
6147 {
6148 prelude_cf_ptr = pAsm->cf_current_alu_clause_ptr;
6149 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6150 }
6151
6152 /* browse thro existing presubs. */
6153 for(i=0; i<pAsm->unNumPresub; i++)
6154 {
6155 if(pAsm->presubs[i].sptSigniture == scriptSigniture)
6156 {
6157 break;
6158 }
6159 }
6160
6161 if(i == pAsm->unNumPresub)
6162 { /* not loaded yet */
6163 /* save assemble context */
6164 number_used_registers_save = pAsm->number_used_registers;
6165 uFirstHelpReg_save = pAsm->uFirstHelpReg;
6166 uHelpReg_save = pAsm->uHelpReg;
6167 starting_temp_register_number_save = pAsm->starting_temp_register_number;
6168 pILInst_save = pAsm->pILInst;
6169 uiCurInst_save = pAsm->uiCurInst;
6170
6171 /* alloc in presub */
6172 if( (pAsm->unNumPresub + 1) > pAsm->unPresubArraySize )
6173 {
6174 pAsm->presubs = (PRESUB_DESC*)_mesa_realloc( (void *)pAsm->presubs,
6175 sizeof(PRESUB_DESC) * pAsm->unPresubArraySize,
6176 sizeof(PRESUB_DESC) * (pAsm->unPresubArraySize + 4) );
6177 if(NULL == pAsm->presubs)
6178 {
6179 radeon_error("No memeory to allocate built in shader function description structures. \n");
6180 return GL_FALSE;
6181 }
6182 pAsm->unPresubArraySize += 4;
6183 }
6184
6185 pPresubDesc = &(pAsm->presubs[i]);
6186 pPresubDesc->sptSigniture = scriptSigniture;
6187
6188 /* constants offsets need to be final resolved at reloc. */
6189 if(0 == pAsm->unNumPresub)
6190 {
6191 pPresubDesc->unConstantsStart = 0;
6192 }
6193 else
6194 {
6195 pPresubDesc->unConstantsStart = pAsm->presubs[i-1].unConstantsStart
6196 + pAsm->presubs[i-1].pCompiledSub->NumParameters;
6197 }
6198
6199 pPresubDesc->pCompiledSub = pCompiledSub;
6200
6201 pPresubDesc->subIL_Shift = pAsm->unCurNumILInsts;
6202 pPresubDesc->maxStartReg = uFirstHelpReg_save;
6203 pAsm->unCurNumILInsts += pCompiledSub->NumInstructions;
6204
6205 pAsm->unNumPresub++;
6206
6207 /* setup new assemble context */
6208 pAsm->starting_temp_register_number = 0;
6209 pAsm->number_used_registers = pCompiledSub->NumTemporaries;
6210 pAsm->uFirstHelpReg = pAsm->number_used_registers;
6211 pAsm->uHelpReg = pAsm->uFirstHelpReg;
6212
6213 bRet = assemble_CAL(pAsm,
6214 0,
6215 pPresubDesc->subIL_Shift,
6216 pCompiledSub->NumInstructions,
6217 pCompiledSub->Instructions,
6218 pPresubDesc);
6219
6220
6221 pPresubDesc->number_used_registers = pAsm->number_used_registers;
6222
6223 /* restore assemble context */
6224 pAsm->number_used_registers = number_used_registers_save;
6225 pAsm->uFirstHelpReg = uFirstHelpReg_save;
6226 pAsm->uHelpReg = uHelpReg_save;
6227 pAsm->starting_temp_register_number = starting_temp_register_number_save;
6228 pAsm->pILInst = pILInst_save;
6229 pAsm->uiCurInst = uiCurInst_save;
6230 }
6231 else
6232 { /* was loaded */
6233 pPresubDesc = &(pAsm->presubs[i]);
6234
6235 bRet = assemble_CAL(pAsm,
6236 0,
6237 pPresubDesc->subIL_Shift,
6238 pCompiledSub->NumInstructions,
6239 pCompiledSub->Instructions,
6240 pPresubDesc);
6241 }
6242
6243 if(GL_FALSE == bRet)
6244 {
6245 radeon_error("Shader presub assemble failed. \n");
6246 }
6247 else
6248 {
6249 /* copy presub output to real dst */
6250 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6251 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6252
6253 if( GL_FALSE == assemble_dst(pAsm) )
6254 {
6255 return GL_FALSE;
6256 }
6257
6258 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6259 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
6260 pAsm->S[0].src.reg = pCompiledSub->dstRegIndex;
6261 pAsm->S[0].src.swizzlex = pCompiledSub->outputSwizzleX;
6262 pAsm->S[0].src.swizzley = pCompiledSub->outputSwizzleY;
6263 pAsm->S[0].src.swizzlez = pCompiledSub->outputSwizzleZ;
6264 pAsm->S[0].src.swizzlew = pCompiledSub->outputSwizzleW;
6265
6266 next_ins(pAsm);
6267
6268 pAsm->callers[pAsm->unCallerArrayPointer - 1].finale_cf_ptr = pAsm->cf_current_alu_clause_ptr;
6269 pAsm->callers[pAsm->unCallerArrayPointer - 1].prelude_cf_ptr = prelude_cf_ptr;
6270 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
6271 }
6272
6273 if( (pPresubDesc->number_used_registers + pAsm->uFirstHelpReg) > pAsm->number_used_registers )
6274 {
6275 pAsm->number_used_registers = pPresubDesc->number_used_registers + pAsm->uFirstHelpReg;
6276 }
6277 if(pAsm->uFirstHelpReg > pPresubDesc->maxStartReg)
6278 {
6279 pPresubDesc->maxStartReg = pAsm->uFirstHelpReg;
6280 }
6281
6282 return bRet;
6283 }
6284
6285 GLboolean Process_Export(r700_AssemblerBase* pAsm,
6286 GLuint type,
6287 GLuint export_starting_index,
6288 GLuint export_count,
6289 GLuint starting_register_number,
6290 GLboolean is_depth_export)
6291 {
6292 unsigned char ucWriteMask;
6293
6294 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
6295 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
6296
6297 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
6298
6299 switch (type)
6300 {
6301 case SQ_EXPORT_PIXEL:
6302 if(GL_TRUE == is_depth_export)
6303 {
6304 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
6305 }
6306 else
6307 {
6308 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
6309 }
6310 break;
6311
6312 case SQ_EXPORT_POS:
6313 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
6314 break;
6315
6316 case SQ_EXPORT_PARAM:
6317 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
6318 break;
6319
6320 default:
6321 radeon_error("Unknown export type: %d\n", type);
6322 return GL_FALSE;
6323 break;
6324 }
6325
6326 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
6327
6328 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
6329 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
6330 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
6331
6332 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
6333 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
6334 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6335 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
6336 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6337 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
6338
6339 if (export_count == 1)
6340 {
6341 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
6342 /* exports Z as a float into Red channel */
6343 if (GL_TRUE == is_depth_export)
6344 ucWriteMask = 0x1;
6345
6346 if( (ucWriteMask & 0x1) != 0)
6347 {
6348 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6349 }
6350 else
6351 {
6352 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
6353 }
6354 if( ((ucWriteMask>>1) & 0x1) != 0)
6355 {
6356 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6357 }
6358 else
6359 {
6360 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
6361 }
6362 if( ((ucWriteMask>>2) & 0x1) != 0)
6363 {
6364 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6365 }
6366 else
6367 {
6368 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
6369 }
6370 if( ((ucWriteMask>>3) & 0x1) != 0)
6371 {
6372 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6373 }
6374 else
6375 {
6376 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
6377 }
6378 }
6379 else
6380 {
6381 // This should only be used if all components for all registers have been written
6382 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6383 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6384 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6385 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6386 }
6387
6388 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
6389
6390 return GL_TRUE;
6391 }
6392
6393 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
6394 {
6395 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
6396 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
6397
6398 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6399
6400 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6401
6402 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6403 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6404 pAsm->D.dst.reg = pAsm->depth_export_register_number;
6405
6406 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
6407
6408 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6409 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6410 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
6411
6412 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
6413
6414 noneg_PVSSRC(&(pAsm->S[0].src));
6415
6416 if( GL_FALSE == next_ins(pAsm) )
6417 {
6418 return GL_FALSE;
6419 }
6420
6421 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
6422
6423 return GL_TRUE;
6424 }
6425
6426 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
6427 GLbitfield OutputsWritten)
6428 {
6429 unsigned int unBit;
6430 GLuint export_count = 0;
6431
6432 if(pR700AsmCode->depth_export_register_number >= 0)
6433 {
6434 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
6435 {
6436 return GL_FALSE;
6437 }
6438 }
6439
6440 unBit = 1 << FRAG_RESULT_COLOR;
6441 if(OutputsWritten & unBit)
6442 {
6443 if( GL_FALSE == Process_Export(pR700AsmCode,
6444 SQ_EXPORT_PIXEL,
6445 0,
6446 1,
6447 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
6448 GL_FALSE) )
6449 {
6450 return GL_FALSE;
6451 }
6452 export_count++;
6453 }
6454 unBit = 1 << FRAG_RESULT_DEPTH;
6455 if(OutputsWritten & unBit)
6456 {
6457 if( GL_FALSE == Process_Export(pR700AsmCode,
6458 SQ_EXPORT_PIXEL,
6459 0,
6460 1,
6461 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
6462 GL_TRUE))
6463 {
6464 return GL_FALSE;
6465 }
6466 export_count++;
6467 }
6468 /* Need to export something, otherwise we'll hang
6469 * results are undefined anyway */
6470 if(export_count == 0)
6471 {
6472 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
6473 }
6474
6475 if(pR700AsmCode->cf_last_export_ptr != NULL)
6476 {
6477 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6478 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6479 }
6480
6481 return GL_TRUE;
6482 }
6483
6484 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
6485 GLbitfield OutputsWritten)
6486 {
6487 unsigned int unBit;
6488 unsigned int i;
6489
6490 GLuint export_starting_index = 0;
6491 GLuint export_count = pR700AsmCode->number_of_exports;
6492
6493 unBit = 1 << VERT_RESULT_HPOS;
6494 if(OutputsWritten & unBit)
6495 {
6496 if( GL_FALSE == Process_Export(pR700AsmCode,
6497 SQ_EXPORT_POS,
6498 export_starting_index,
6499 1,
6500 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
6501 GL_FALSE) )
6502 {
6503 return GL_FALSE;
6504 }
6505
6506 export_count--;
6507
6508 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6509 }
6510
6511 pR700AsmCode->number_of_exports = export_count;
6512
6513 unBit = 1 << VERT_RESULT_COL0;
6514 if(OutputsWritten & unBit)
6515 {
6516 if( GL_FALSE == Process_Export(pR700AsmCode,
6517 SQ_EXPORT_PARAM,
6518 export_starting_index,
6519 1,
6520 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
6521 GL_FALSE) )
6522 {
6523 return GL_FALSE;
6524 }
6525
6526 export_starting_index++;
6527 }
6528
6529 unBit = 1 << VERT_RESULT_COL1;
6530 if(OutputsWritten & unBit)
6531 {
6532 if( GL_FALSE == Process_Export(pR700AsmCode,
6533 SQ_EXPORT_PARAM,
6534 export_starting_index,
6535 1,
6536 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
6537 GL_FALSE) )
6538 {
6539 return GL_FALSE;
6540 }
6541
6542 export_starting_index++;
6543 }
6544
6545 unBit = 1 << VERT_RESULT_FOGC;
6546 if(OutputsWritten & unBit)
6547 {
6548 if( GL_FALSE == Process_Export(pR700AsmCode,
6549 SQ_EXPORT_PARAM,
6550 export_starting_index,
6551 1,
6552 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
6553 GL_FALSE) )
6554 {
6555 return GL_FALSE;
6556 }
6557
6558 export_starting_index++;
6559 }
6560
6561 for(i=0; i<8; i++)
6562 {
6563 unBit = 1 << (VERT_RESULT_TEX0 + i);
6564 if(OutputsWritten & unBit)
6565 {
6566 if( GL_FALSE == Process_Export(pR700AsmCode,
6567 SQ_EXPORT_PARAM,
6568 export_starting_index,
6569 1,
6570 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
6571 GL_FALSE) )
6572 {
6573 return GL_FALSE;
6574 }
6575
6576 export_starting_index++;
6577 }
6578 }
6579
6580 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
6581 {
6582 unBit = 1 << i;
6583 if(OutputsWritten & unBit)
6584 {
6585 if( GL_FALSE == Process_Export(pR700AsmCode,
6586 SQ_EXPORT_PARAM,
6587 export_starting_index,
6588 1,
6589 pR700AsmCode->ucVP_OutputMap[i],
6590 GL_FALSE) )
6591 {
6592 return GL_FALSE;
6593 }
6594
6595 export_starting_index++;
6596 }
6597 }
6598
6599 // At least one param should be exported
6600 if (export_count)
6601 {
6602 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6603 }
6604 else
6605 {
6606 if( GL_FALSE == Process_Export(pR700AsmCode,
6607 SQ_EXPORT_PARAM,
6608 0,
6609 1,
6610 pR700AsmCode->starting_export_register_number,
6611 GL_FALSE) )
6612 {
6613 return GL_FALSE;
6614 }
6615
6616 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
6617 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
6618 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
6619 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
6620 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6621 }
6622
6623 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6624
6625 return GL_TRUE;
6626 }
6627
6628 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
6629 {
6630 FREE(pR700AsmCode->pucOutMask);
6631 FREE(pR700AsmCode->pInstDeps);
6632
6633 if(NULL != pR700AsmCode->subs)
6634 {
6635 FREE(pR700AsmCode->subs);
6636 }
6637 if(NULL != pR700AsmCode->callers)
6638 {
6639 FREE(pR700AsmCode->callers);
6640 }
6641
6642 if(NULL != pR700AsmCode->presubs)
6643 {
6644 FREE(pR700AsmCode->presubs);
6645 }
6646
6647 return GL_TRUE;
6648 }
6649