r600 : Initial version of glsl fc.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 #define USE_CF_FOR_CONTINUE_BREAK 1
42
43 BITS addrmode_PVSDST(PVSDST * pPVSDST)
44 {
45 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
46 }
47
48 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
49 {
50 pPVSDST->addrmode0 = addrmode & 1;
51 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
52 }
53
54 void nomask_PVSDST(PVSDST * pPVSDST)
55 {
56 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
57 }
58
59 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
60 {
61 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
62 }
63
64 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
65 {
66 pPVSSRC->addrmode0 = addrmode & 1;
67 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
68 }
69
70
71 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
72 {
73 pPVSSRC->swizzlex =
74 pPVSSRC->swizzley =
75 pPVSSRC->swizzlez =
76 pPVSSRC->swizzlew = swz;
77 }
78
79 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
80 {
81 pPVSSRC->swizzlex = SQ_SEL_X;
82 pPVSSRC->swizzley = SQ_SEL_Y;
83 pPVSSRC->swizzlez = SQ_SEL_Z;
84 pPVSSRC->swizzlew = SQ_SEL_W;
85 }
86
87 void
88 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
89 {
90 switch (x)
91 {
92 case SQ_SEL_X: x = pPVSSRC->swizzlex;
93 break;
94 case SQ_SEL_Y: x = pPVSSRC->swizzley;
95 break;
96 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
97 break;
98 case SQ_SEL_W: x = pPVSSRC->swizzlew;
99 break;
100 default:;
101 }
102
103 switch (y)
104 {
105 case SQ_SEL_X: y = pPVSSRC->swizzlex;
106 break;
107 case SQ_SEL_Y: y = pPVSSRC->swizzley;
108 break;
109 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
110 break;
111 case SQ_SEL_W: y = pPVSSRC->swizzlew;
112 break;
113 default:;
114 }
115
116 switch (z)
117 {
118 case SQ_SEL_X: z = pPVSSRC->swizzlex;
119 break;
120 case SQ_SEL_Y: z = pPVSSRC->swizzley;
121 break;
122 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
123 break;
124 case SQ_SEL_W: z = pPVSSRC->swizzlew;
125 break;
126 default:;
127 }
128
129 switch (w)
130 {
131 case SQ_SEL_X: w = pPVSSRC->swizzlex;
132 break;
133 case SQ_SEL_Y: w = pPVSSRC->swizzley;
134 break;
135 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
136 break;
137 case SQ_SEL_W: w = pPVSSRC->swizzlew;
138 break;
139 default:;
140 }
141
142 pPVSSRC->swizzlex = x;
143 pPVSSRC->swizzley = y;
144 pPVSSRC->swizzlez = z;
145 pPVSSRC->swizzlew = w;
146 }
147
148 void neg_PVSSRC(PVSSRC* pPVSSRC)
149 {
150 pPVSSRC->negx = 1;
151 pPVSSRC->negy = 1;
152 pPVSSRC->negz = 1;
153 pPVSSRC->negw = 1;
154 }
155
156 void noneg_PVSSRC(PVSSRC* pPVSSRC)
157 {
158 pPVSSRC->negx = 0;
159 pPVSSRC->negy = 0;
160 pPVSSRC->negz = 0;
161 pPVSSRC->negw = 0;
162 }
163
164 // negate argument (for SUB instead of ADD and alike)
165 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
166 {
167 pPVSSRC->negx = !pPVSSRC->negx;
168 pPVSSRC->negy = !pPVSSRC->negy;
169 pPVSSRC->negz = !pPVSSRC->negz;
170 pPVSSRC->negw = !pPVSSRC->negw;
171 }
172
173 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
174 {
175 switch (c)
176 {
177 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
178 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
179 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
180 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
181 default:;
182 }
183 }
184
185 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
186 {
187 switch (c)
188 {
189 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
190 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
191 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
192 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
193 default:;
194 }
195 }
196
197 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
198 {
199 return (pOutVTXFmt0->point_size |
200 pOutVTXFmt0->edge_flag |
201 pOutVTXFmt0->rta_index |
202 pOutVTXFmt0->kill_flag |
203 pOutVTXFmt0->viewport_index);
204 }
205
206 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
207 {
208 return (pFPOutFmt->depth |
209 pFPOutFmt->stencil_ref |
210 pFPOutFmt->mask |
211 pFPOutFmt->coverage_to_mask);
212 }
213
214 GLboolean is_reduction_opcode(PVSDWORD* dest)
215 {
216 if (dest->dst.op3 == 0)
217 {
218 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
219 {
220 return GL_TRUE;
221 }
222 }
223 return GL_FALSE;
224 }
225
226 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
227 {
228 GLuint format = FMT_INVALID;
229 GLuint uiElemSize = 0;
230
231 switch (eType)
232 {
233 case GL_BYTE:
234 case GL_UNSIGNED_BYTE:
235 uiElemSize = 1;
236 switch(nChannels)
237 {
238 case 1:
239 format = FMT_8; break;
240 case 2:
241 format = FMT_8_8; break;
242 case 3:
243 format = FMT_8_8_8; break;
244 case 4:
245 format = FMT_8_8_8_8; break;
246 default:
247 break;
248 }
249 break;
250
251 case GL_UNSIGNED_SHORT:
252 case GL_SHORT:
253 uiElemSize = 2;
254 switch(nChannels)
255 {
256 case 1:
257 format = FMT_16; break;
258 case 2:
259 format = FMT_16_16; break;
260 case 3:
261 format = FMT_16_16_16; break;
262 case 4:
263 format = FMT_16_16_16_16; break;
264 default:
265 break;
266 }
267 break;
268
269 case GL_UNSIGNED_INT:
270 case GL_INT:
271 uiElemSize = 4;
272 switch(nChannels)
273 {
274 case 1:
275 format = FMT_32; break;
276 case 2:
277 format = FMT_32_32; break;
278 case 3:
279 format = FMT_32_32_32; break;
280 case 4:
281 format = FMT_32_32_32_32; break;
282 default:
283 break;
284 }
285 break;
286
287 case GL_FLOAT:
288 uiElemSize = 4;
289 switch(nChannels)
290 {
291 case 1:
292 format = FMT_32_FLOAT; break;
293 case 2:
294 format = FMT_32_32_FLOAT; break;
295 case 3:
296 format = FMT_32_32_32_FLOAT; break;
297 case 4:
298 format = FMT_32_32_32_32_FLOAT; break;
299 default:
300 break;
301 }
302 break;
303 case GL_DOUBLE:
304 uiElemSize = 8;
305 switch(nChannels)
306 {
307 case 1:
308 format = FMT_32_FLOAT; break;
309 case 2:
310 format = FMT_32_32_FLOAT; break;
311 case 3:
312 format = FMT_32_32_32_FLOAT; break;
313 case 4:
314 format = FMT_32_32_32_32_FLOAT; break;
315 default:
316 break;
317 }
318 break;
319 default:
320 ;
321 //GL_ASSERT_NO_CASE();
322 }
323
324 if(NULL != pClient_size)
325 {
326 *pClient_size = uiElemSize * nChannels;
327 }
328
329 return(format);
330 }
331
332 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
333 {
334 if(pAsm->D.dst.op3)
335 {
336 return 3;
337 }
338
339 switch (pAsm->D.dst.opcode)
340 {
341 case SQ_OP2_INST_ADD:
342 case SQ_OP2_INST_KILLGT:
343 case SQ_OP2_INST_MUL:
344 case SQ_OP2_INST_MAX:
345 case SQ_OP2_INST_MIN:
346 //case SQ_OP2_INST_MAX_DX10:
347 //case SQ_OP2_INST_MIN_DX10:
348 case SQ_OP2_INST_SETE:
349 case SQ_OP2_INST_SETNE:
350 case SQ_OP2_INST_SETGT:
351 case SQ_OP2_INST_SETGE:
352 case SQ_OP2_INST_PRED_SETE:
353 case SQ_OP2_INST_PRED_SETGT:
354 case SQ_OP2_INST_PRED_SETGE:
355 case SQ_OP2_INST_PRED_SETNE:
356 case SQ_OP2_INST_DOT4:
357 case SQ_OP2_INST_DOT4_IEEE:
358 case SQ_OP2_INST_CUBE:
359 return 2;
360
361 case SQ_OP2_INST_MOV:
362 case SQ_OP2_INST_MOVA_FLOOR:
363 case SQ_OP2_INST_FRACT:
364 case SQ_OP2_INST_FLOOR:
365 case SQ_OP2_INST_EXP_IEEE:
366 case SQ_OP2_INST_LOG_CLAMPED:
367 case SQ_OP2_INST_LOG_IEEE:
368 case SQ_OP2_INST_RECIP_IEEE:
369 case SQ_OP2_INST_RECIPSQRT_IEEE:
370 case SQ_OP2_INST_FLT_TO_INT:
371 case SQ_OP2_INST_SIN:
372 case SQ_OP2_INST_COS:
373 return 1;
374
375 default: radeon_error(
376 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
377 };
378
379 return 3;
380 }
381
382 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
383 {
384 GLuint i;
385
386 Init_R700_Shader(pShader);
387 pAsm->pR700Shader = pShader;
388 pAsm->currentShaderType = spt;
389
390 pAsm->cf_last_export_ptr = NULL;
391
392 pAsm->cf_current_export_clause_ptr = NULL;
393 pAsm->cf_current_alu_clause_ptr = NULL;
394 pAsm->cf_current_tex_clause_ptr = NULL;
395 pAsm->cf_current_vtx_clause_ptr = NULL;
396 pAsm->cf_current_cf_clause_ptr = NULL;
397
398 // No clause has been created yet
399 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
400
401 pAsm->number_of_colorandz_exports = 0;
402 pAsm->number_of_exports = 0;
403 pAsm->number_of_export_opcodes = 0;
404
405 pAsm->alu_x_opcode = 0;
406
407 pAsm->D2.bits = 0;
408
409 pAsm->D.bits = 0;
410 pAsm->S[0].bits = 0;
411 pAsm->S[1].bits = 0;
412 pAsm->S[2].bits = 0;
413
414 pAsm->uLastPosUpdate = 0;
415
416 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
417
418 pAsm->uIIns = 0;
419 pAsm->uOIns = 0;
420 pAsm->number_used_registers = 0;
421 pAsm->uUsedConsts = 256;
422
423
424 // Fragment programs
425 pAsm->uBoolConsts = 0;
426 pAsm->uIntConsts = 0;
427 pAsm->uInsts = 0;
428 pAsm->uConsts = 0;
429
430 pAsm->FCSP = 0;
431 pAsm->fc_stack[0].type = FC_NONE;
432
433 pAsm->branch_depth = 0;
434 pAsm->max_branch_depth = 0;
435
436 pAsm->aArgSubst[0] =
437 pAsm->aArgSubst[1] =
438 pAsm->aArgSubst[2] =
439 pAsm->aArgSubst[3] = (-1);
440
441 pAsm->uOutputs = 0;
442
443 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
444 {
445 pAsm->color_export_register_number[i] = (-1);
446 }
447
448
449 pAsm->depth_export_register_number = (-1);
450 pAsm->stencil_export_register_number = (-1);
451 pAsm->coverage_to_mask_export_register_number = (-1);
452 pAsm->mask_export_register_number = (-1);
453
454 pAsm->starting_export_register_number = 0;
455 pAsm->starting_vfetch_register_number = 0;
456 pAsm->starting_temp_register_number = 0;
457 pAsm->uFirstHelpReg = 0;
458
459
460 pAsm->input_position_is_used = GL_FALSE;
461 pAsm->input_normal_is_used = GL_FALSE;
462
463
464 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
465 {
466 pAsm->input_color_is_used[ i ] = GL_FALSE;
467 }
468
469 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
470 {
471 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
472 }
473
474 for (i=0; i<VERT_ATTRIB_MAX; i++)
475 {
476 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
477 }
478
479 pAsm->number_of_inputs = 0;
480
481 pAsm->is_tex = GL_FALSE;
482 pAsm->need_tex_barrier = GL_FALSE;
483
484 pAsm->subs = NULL;
485 pAsm->unSubArraySize = 0;
486 pAsm->unSubArrayPointer = 0;
487 pAsm->callers = NULL;
488 pAsm->unCallerArraySize = 0;
489 pAsm->unCallerArrayPointer = 0;
490
491 pAsm->CALLSP = 0;
492 pAsm->CALLSTACK[0].FCSP_BeforeEntry;
493 pAsm->CALLSTACK[0].plstCFInstructions_local
494 = &(pAsm->pR700Shader->lstCFInstructions);
495
496 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
497
498 pAsm->unCFflags = 0;
499
500 return 0;
501 }
502
503 GLboolean IsTex(gl_inst_opcode Opcode)
504 {
505 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
506 {
507 return GL_TRUE;
508 }
509 return GL_FALSE;
510 }
511
512 GLboolean IsAlu(gl_inst_opcode Opcode)
513 {
514 //TODO : more for fc and ex for higher spec.
515 if( IsTex(Opcode) )
516 {
517 return GL_FALSE;
518 }
519 return GL_TRUE;
520 }
521
522 int check_current_clause(r700_AssemblerBase* pAsm,
523 CF_CLAUSE_TYPE new_clause_type)
524 {
525 if (pAsm->cf_current_clause_type != new_clause_type)
526 { //Close last open clause
527 switch (pAsm->cf_current_clause_type)
528 {
529 case CF_ALU_CLAUSE:
530 if ( pAsm->cf_current_alu_clause_ptr != NULL)
531 {
532 pAsm->cf_current_alu_clause_ptr = NULL;
533 }
534 break;
535 case CF_VTX_CLAUSE:
536 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
537 {
538 pAsm->cf_current_vtx_clause_ptr = NULL;
539 }
540 break;
541 case CF_TEX_CLAUSE:
542 if ( pAsm->cf_current_tex_clause_ptr != NULL)
543 {
544 pAsm->cf_current_tex_clause_ptr = NULL;
545 }
546 break;
547 case CF_EXPORT_CLAUSE:
548 if ( pAsm->cf_current_export_clause_ptr != NULL)
549 {
550 pAsm->cf_current_export_clause_ptr = NULL;
551 }
552 break;
553 case CF_OTHER_CLAUSE:
554 if ( pAsm->cf_current_cf_clause_ptr != NULL)
555 {
556 pAsm->cf_current_cf_clause_ptr = NULL;
557 }
558 break;
559 case CF_EMPTY_CLAUSE:
560 break;
561 default:
562 radeon_error(
563 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
564 return GL_FALSE;
565 }
566
567 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
568
569 // Create new clause
570 switch (new_clause_type)
571 {
572 case CF_ALU_CLAUSE:
573 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
574 break;
575 case CF_VTX_CLAUSE:
576 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
577 break;
578 case CF_TEX_CLAUSE:
579 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
580 break;
581 case CF_EXPORT_CLAUSE:
582 {
583 R700ControlFlowSXClause* pR700ControlFlowSXClause
584 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
585
586 // Add new export instruction to control flow program
587 if (pR700ControlFlowSXClause != 0)
588 {
589 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
590 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
591 AddCFInstruction( pAsm->pR700Shader,
592 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
593 }
594 else
595 {
596 radeon_error(
597 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
598 return GL_FALSE;
599 }
600 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
601 }
602 break;
603 case CF_EMPTY_CLAUSE:
604 break;
605 case CF_OTHER_CLAUSE:
606 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
607 break;
608 default:
609 radeon_error(
610 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
611 return GL_FALSE;
612 }
613 }
614
615 return GL_TRUE;
616 }
617
618 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
619 {
620 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
621 {
622 return GL_FALSE;
623 }
624
625 pAsm->cf_current_cf_clause_ptr =
626 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
627
628 if (pAsm->cf_current_cf_clause_ptr != NULL)
629 {
630 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
631 AddCFInstruction( pAsm->pR700Shader,
632 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
633 }
634 else
635 {
636 radeon_error("Could not allocate a new VFetch CF instruction.\n");
637 return GL_FALSE;
638 }
639
640 return GL_TRUE;
641 }
642
643 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
644 R700VertexInstruction* vertex_instruction_ptr)
645 {
646 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
647 {
648 return GL_FALSE;
649 }
650
651 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
652 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
653 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
654 ) )
655 {
656 // Create new Vfetch control flow instruction for this new clause
657 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
658
659 if (pAsm->cf_current_vtx_clause_ptr != NULL)
660 {
661 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
662 AddCFInstruction( pAsm->pR700Shader,
663 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
664 }
665 else
666 {
667 radeon_error("Could not allocate a new VFetch CF instruction.\n");
668 return GL_FALSE;
669 }
670
671 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
672 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
673 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
674 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
675 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
676 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
677 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
678 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
679 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
680
681 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
682 }
683 else
684 {
685 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
686 }
687
688 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
689
690 return GL_TRUE;
691 }
692
693 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
694 R700TextureInstruction* tex_instruction_ptr)
695 {
696 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
697 {
698 return GL_FALSE;
699 }
700
701 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
702 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
703 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
704 ) )
705 {
706 // new tex cf instruction for this new clause
707 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
708
709 if (pAsm->cf_current_tex_clause_ptr != NULL)
710 {
711 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
712 AddCFInstruction( pAsm->pR700Shader,
713 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
714 }
715 else
716 {
717 radeon_error("Could not allocate a new TEX CF instruction.\n");
718 return GL_FALSE;
719 }
720
721 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
722 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
723 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
724
725 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
726 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
727 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
728 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
729 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
730 }
731 else
732 {
733 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
734 }
735
736 // If this clause constains any TEX instruction that is dependent on a previous instruction,
737 // set the barrier bit
738 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
739 {
740 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
741 }
742
743 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
744 {
745 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
746 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
747 }
748
749 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
750
751 return GL_TRUE;
752 }
753
754 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
755 GLuint gl_client_id,
756 GLuint destination_register,
757 GLuint number_of_elements,
758 GLenum dataElementType,
759 VTX_FETCH_METHOD* pFetchMethod)
760 {
761 GLuint client_size_inbyte;
762 GLuint data_format;
763 GLuint mega_fetch_count;
764 GLuint is_mega_fetch_flag;
765
766 R700VertexGenericFetch* vfetch_instruction_ptr;
767 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
768
769 if (assembled_vfetch_instruction_ptr == NULL)
770 {
771 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
772 if (vfetch_instruction_ptr == NULL)
773 {
774 return GL_FALSE;
775 }
776 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
777 }
778 else
779 {
780 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
781 }
782
783 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
784
785 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
786 {
787 //TODO : mini fetch
788 }
789 else
790 {
791 mega_fetch_count = MEGA_FETCH_BYTES - 1;
792 is_mega_fetch_flag = 0x1;
793 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
794 }
795
796 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
797 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
798 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
799
800 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
801 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
802 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
803 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
804 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
805
806 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
807 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
808 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
809 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
810
811 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
812
813 // Destination register
814 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
815 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
816
817 vfetch_instruction_ptr->m_Word2.f.offset = 0;
818 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
819
820 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
821
822 if (assembled_vfetch_instruction_ptr == NULL)
823 {
824 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
825 {
826 return GL_FALSE;
827 }
828
829 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
830 {
831 return GL_FALSE;
832 }
833 else
834 {
835 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
836 }
837 }
838
839 return GL_TRUE;
840 }
841
842 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
843 GLuint destination_register,
844 GLenum type,
845 GLint size,
846 GLubyte element,
847 GLuint _signed,
848 GLboolean normalize,
849 VTX_FETCH_METHOD * pFetchMethod)
850 {
851 GLuint client_size_inbyte;
852 GLuint data_format;
853 GLuint mega_fetch_count;
854 GLuint is_mega_fetch_flag;
855
856 R700VertexGenericFetch* vfetch_instruction_ptr;
857 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
858 = pAsm->vfetch_instruction_ptr_array[element];
859
860 if (assembled_vfetch_instruction_ptr == NULL)
861 {
862 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
863 if (vfetch_instruction_ptr == NULL)
864 {
865 return GL_FALSE;
866 }
867 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
868 }
869 else
870 {
871 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
872 }
873
874 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
875
876 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
877 {
878 //TODO : mini fetch
879 }
880 else
881 {
882 mega_fetch_count = MEGA_FETCH_BYTES - 1;
883 is_mega_fetch_flag = 0x1;
884 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
885 }
886
887 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
888 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
889 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
890
891 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
892 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
893 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
894 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
895 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
896
897 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
898 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
899 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
900 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
901
902 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
903 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
904 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
905
906 if(1 == _signed)
907 {
908 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
909 }
910 else
911 {
912 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
913 }
914
915 if(GL_TRUE == normalize)
916 {
917 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
918 }
919 else
920 {
921 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
922 }
923
924 // Destination register
925 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
926 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
927
928 vfetch_instruction_ptr->m_Word2.f.offset = 0;
929 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
930
931 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
932
933 if (assembled_vfetch_instruction_ptr == NULL)
934 {
935 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
936 {
937 return GL_FALSE;
938 }
939
940 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
941 {
942 return GL_FALSE;
943 }
944 else
945 {
946 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
947 }
948 }
949
950 return GL_TRUE;
951 }
952
953 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
954 {
955 GLint i;
956 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
957 pAsm->cf_current_vtx_clause_ptr = NULL;
958
959 for (i=0; i<VERT_ATTRIB_MAX; i++)
960 {
961 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
962 }
963
964 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
965
966 return GL_TRUE;
967 }
968
969 GLuint gethelpr(r700_AssemblerBase* pAsm)
970 {
971 GLuint r = pAsm->uHelpReg;
972 pAsm->uHelpReg++;
973 if (pAsm->uHelpReg > pAsm->number_used_registers)
974 {
975 pAsm->number_used_registers = pAsm->uHelpReg;
976 }
977 return r;
978 }
979 void resethelpr(r700_AssemblerBase* pAsm)
980 {
981 pAsm->uHelpReg = pAsm->uFirstHelpReg;
982 }
983
984 void checkop_init(r700_AssemblerBase* pAsm)
985 {
986 resethelpr(pAsm);
987 pAsm->aArgSubst[0] =
988 pAsm->aArgSubst[1] =
989 pAsm->aArgSubst[2] =
990 pAsm->aArgSubst[3] = -1;
991 }
992
993 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
994 {
995 GLuint tmp = gethelpr(pAsm);
996
997 //mov src to temp helper gpr.
998 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
999
1000 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1001
1002 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1003 pAsm->D.dst.reg = tmp;
1004
1005 nomask_PVSDST(&(pAsm->D.dst));
1006
1007 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1008 {
1009 return GL_FALSE;
1010 }
1011
1012 noswizzle_PVSSRC(&(pAsm->S[0].src));
1013 noneg_PVSSRC(&(pAsm->S[0].src));
1014
1015 if( GL_FALSE == next_ins(pAsm) )
1016 {
1017 return GL_FALSE;
1018 }
1019
1020 pAsm->aArgSubst[1 + src] = tmp;
1021
1022 return GL_TRUE;
1023 }
1024
1025 GLboolean checkop1(r700_AssemblerBase* pAsm)
1026 {
1027 checkop_init(pAsm);
1028 return GL_TRUE;
1029 }
1030
1031 GLboolean checkop2(r700_AssemblerBase* pAsm)
1032 {
1033 GLboolean bSrcConst[2];
1034 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1035
1036 checkop_init(pAsm);
1037
1038 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1039 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1040 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1041 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1042 {
1043 bSrcConst[0] = GL_TRUE;
1044 }
1045 else
1046 {
1047 bSrcConst[0] = GL_FALSE;
1048 }
1049 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1050 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1051 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1052 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1053 {
1054 bSrcConst[1] = GL_TRUE;
1055 }
1056 else
1057 {
1058 bSrcConst[1] = GL_FALSE;
1059 }
1060
1061 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1062 {
1063 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1064 {
1065 if( GL_FALSE == mov_temp(pAsm, 1) )
1066 {
1067 return GL_FALSE;
1068 }
1069 }
1070 }
1071
1072 return GL_TRUE;
1073 }
1074
1075 GLboolean checkop3(r700_AssemblerBase* pAsm)
1076 {
1077 GLboolean bSrcConst[3];
1078 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1079
1080 checkop_init(pAsm);
1081
1082 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1083 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1084 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1085 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1086 {
1087 bSrcConst[0] = GL_TRUE;
1088 }
1089 else
1090 {
1091 bSrcConst[0] = GL_FALSE;
1092 }
1093 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1094 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1095 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1096 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1097 {
1098 bSrcConst[1] = GL_TRUE;
1099 }
1100 else
1101 {
1102 bSrcConst[1] = GL_FALSE;
1103 }
1104 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1105 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1106 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1107 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1108 {
1109 bSrcConst[2] = GL_TRUE;
1110 }
1111 else
1112 {
1113 bSrcConst[2] = GL_FALSE;
1114 }
1115
1116 if( (GL_TRUE == bSrcConst[0]) &&
1117 (GL_TRUE == bSrcConst[1]) &&
1118 (GL_TRUE == bSrcConst[2]) )
1119 {
1120 if( GL_FALSE == mov_temp(pAsm, 1) )
1121 {
1122 return GL_FALSE;
1123 }
1124 if( GL_FALSE == mov_temp(pAsm, 2) )
1125 {
1126 return GL_FALSE;
1127 }
1128
1129 return GL_TRUE;
1130 }
1131 else if( (GL_TRUE == bSrcConst[0]) &&
1132 (GL_TRUE == bSrcConst[1]) )
1133 {
1134 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1135 {
1136 if( GL_FALSE == mov_temp(pAsm, 1) )
1137 {
1138 return 1;
1139 }
1140 }
1141
1142 return GL_TRUE;
1143 }
1144 else if ( (GL_TRUE == bSrcConst[0]) &&
1145 (GL_TRUE == bSrcConst[2]) )
1146 {
1147 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1148 {
1149 if( GL_FALSE == mov_temp(pAsm, 2) )
1150 {
1151 return GL_FALSE;
1152 }
1153 }
1154
1155 return GL_TRUE;
1156 }
1157 else if( (GL_TRUE == bSrcConst[1]) &&
1158 (GL_TRUE == bSrcConst[2]) )
1159 {
1160 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1161 {
1162 if( GL_FALSE == mov_temp(pAsm, 2) )
1163 {
1164 return GL_FALSE;
1165 }
1166 }
1167
1168 return GL_TRUE;
1169 }
1170
1171 return GL_TRUE;
1172 }
1173
1174 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1175 int src,
1176 int fld)
1177 {
1178 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1179
1180 if (fld == -1)
1181 {
1182 fld = src;
1183 }
1184
1185 if(pAsm->aArgSubst[1+src] >= 0)
1186 {
1187 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1188 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1189 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1190 }
1191 else
1192 {
1193 switch (pILInst->SrcReg[src].File)
1194 {
1195 case PROGRAM_TEMPORARY:
1196 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1197 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1198 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1199 break;
1200 case PROGRAM_CONSTANT:
1201 case PROGRAM_LOCAL_PARAM:
1202 case PROGRAM_ENV_PARAM:
1203 case PROGRAM_STATE_VAR:
1204 case PROGRAM_UNIFORM:
1205 if (1 == pILInst->SrcReg[src].RelAddr)
1206 {
1207 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1208 }
1209 else
1210 {
1211 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1212 }
1213
1214 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1215 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1216 break;
1217 case PROGRAM_INPUT:
1218 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1219 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1220 switch (pAsm->currentShaderType)
1221 {
1222 case SPT_FP:
1223 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1224 break;
1225 case SPT_VP:
1226 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1227 break;
1228 }
1229 break;
1230 default:
1231 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1232 return GL_FALSE;
1233 }
1234 }
1235
1236 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1237 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1238 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1239 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1240
1241 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1242 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1243 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1244 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1245
1246 return GL_TRUE;
1247 }
1248
1249 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1250 {
1251 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1252 switch (pILInst->DstReg.File)
1253 {
1254 case PROGRAM_TEMPORARY:
1255 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1256 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1257 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1258 break;
1259 case PROGRAM_ADDRESS:
1260 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1261 pAsm->D.dst.rtype = DST_REG_A0;
1262 pAsm->D.dst.reg = 0;
1263 break;
1264 case PROGRAM_OUTPUT:
1265 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1266 pAsm->D.dst.rtype = DST_REG_OUT;
1267 switch (pAsm->currentShaderType)
1268 {
1269 case SPT_FP:
1270 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1271 break;
1272 case SPT_VP:
1273 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1274 break;
1275 }
1276 break;
1277 default:
1278 radeon_error("Invalid destination output argument type\n");
1279 return GL_FALSE;
1280 }
1281
1282 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1283 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1284 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1285 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1286
1287 return GL_TRUE;
1288 }
1289
1290 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1291 {
1292 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1293
1294 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1295 {
1296 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1297 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1298
1299 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1300 }
1301 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1302 {
1303 pAsm->D.dst.rtype = DST_REG_OUT;
1304 switch (pAsm->currentShaderType)
1305 {
1306 case SPT_FP:
1307 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1308 break;
1309 case SPT_VP:
1310 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1311 break;
1312 }
1313
1314 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1315 }
1316 else
1317 {
1318 radeon_error("Invalid destination output argument type\n");
1319 return GL_FALSE;
1320 }
1321
1322 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1323 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1324 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1325 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1326
1327 return GL_TRUE;
1328 }
1329
1330 GLboolean tex_src(r700_AssemblerBase *pAsm)
1331 {
1332 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1333
1334 GLboolean bValidTexCoord = GL_FALSE;
1335
1336 if(pAsm->aArgSubst[1] >= 0)
1337 {
1338 bValidTexCoord = GL_TRUE;
1339 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1340 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1341 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1342 }
1343 else
1344 {
1345 switch (pILInst->SrcReg[0].File) {
1346 case PROGRAM_CONSTANT:
1347 case PROGRAM_LOCAL_PARAM:
1348 case PROGRAM_ENV_PARAM:
1349 case PROGRAM_STATE_VAR:
1350 break;
1351 case PROGRAM_TEMPORARY:
1352 bValidTexCoord = GL_TRUE;
1353 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1354 pAsm->starting_temp_register_number;
1355 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1356 break;
1357 case PROGRAM_INPUT:
1358 switch (pILInst->SrcReg[0].Index)
1359 {
1360 case FRAG_ATTRIB_WPOS:
1361 case FRAG_ATTRIB_COL0:
1362 case FRAG_ATTRIB_COL1:
1363 case FRAG_ATTRIB_FOGC:
1364 case FRAG_ATTRIB_TEX0:
1365 case FRAG_ATTRIB_TEX1:
1366 case FRAG_ATTRIB_TEX2:
1367 case FRAG_ATTRIB_TEX3:
1368 case FRAG_ATTRIB_TEX4:
1369 case FRAG_ATTRIB_TEX5:
1370 case FRAG_ATTRIB_TEX6:
1371 case FRAG_ATTRIB_TEX7:
1372 bValidTexCoord = GL_TRUE;
1373 pAsm->S[0].src.reg =
1374 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1375 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1376 break;
1377 case FRAG_ATTRIB_FACE:
1378 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1379 break;
1380 case FRAG_ATTRIB_PNTC:
1381 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1382 break;
1383 case FRAG_ATTRIB_VAR0:
1384 fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
1385 break;
1386 }
1387
1388 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1389 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1390 {
1391 bValidTexCoord = GL_TRUE;
1392 pAsm->S[0].src.reg =
1393 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1394 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1395 }
1396
1397 break;
1398 }
1399 }
1400
1401 if(GL_TRUE == bValidTexCoord)
1402 {
1403 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1404 }
1405 else
1406 {
1407 radeon_error("Invalid source texcoord for TEX instruction\n");
1408 return GL_FALSE;
1409 }
1410
1411 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1412 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1413 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1414 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1415
1416 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1417 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1418 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1419 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1420
1421 return GL_TRUE;
1422 }
1423
1424 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1425 {
1426 PVSSRC * texture_coordinate_source;
1427 PVSSRC * texture_unit_source;
1428
1429 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1430 if (tex_instruction_ptr == NULL)
1431 {
1432 return GL_FALSE;
1433 }
1434 Init_R700TextureInstruction(tex_instruction_ptr);
1435
1436 texture_coordinate_source = &(pAsm->S[0].src);
1437 texture_unit_source = &(pAsm->S[1].src);
1438
1439 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1440 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1441 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1442
1443 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1444
1445 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1446 if (normalized) {
1447 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1448 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1449 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1450 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1451 } else {
1452 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1453 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1454 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1455 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1456 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1457 }
1458
1459 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1460 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1461 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1462
1463 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1464
1465 // dst
1466 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1467 (pAsm->D.dst.rtype == DST_REG_OUT) )
1468 {
1469 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1470 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1471
1472 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1473 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1474
1475 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1476 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1477 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1478 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1479
1480
1481 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1482 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1483 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1484 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1485 }
1486 else
1487 {
1488 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1489 return GL_FALSE;
1490 }
1491
1492 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1493 {
1494 return GL_FALSE;
1495 }
1496
1497 return GL_TRUE;
1498 }
1499
1500 void initialize(r700_AssemblerBase *pAsm)
1501 {
1502 GLuint cycle, component;
1503
1504 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1505 {
1506 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1507 {
1508 pAsm->hw_gpr[cycle][component] = (-1);
1509 }
1510 }
1511 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1512 {
1513 pAsm->hw_cfile_addr[component] = (-1);
1514 pAsm->hw_cfile_chan[component] = (-1);
1515 }
1516 }
1517
1518 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1519 int source_index,
1520 PVSSRC* pSource,
1521 BITS scalar_channel_index)
1522 {
1523 BITS src_sel;
1524 BITS src_rel;
1525 BITS src_chan;
1526 BITS src_neg;
1527
1528 //--------------------------------------------------------------------------
1529 // Source for operands src0, src1.
1530 // Values [0,127] correspond to GPR[0..127].
1531 // Values [256,511] correspond to cfile constants c[0..255].
1532
1533 //--------------------------------------------------------------------------
1534 // Other special values are shown in the list below.
1535
1536 // 248 SQ_ALU_SRC_0: special constant 0.0.
1537 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1538
1539 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1540 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1541
1542 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1543 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1544
1545 // 254 SQ_ALU_SRC_PV: previous vector result.
1546 // 255 SQ_ALU_SRC_PS: previous scalar result.
1547 //--------------------------------------------------------------------------
1548
1549 BITS channel_swizzle;
1550 switch (scalar_channel_index)
1551 {
1552 case 0: channel_swizzle = pSource->swizzlex; break;
1553 case 1: channel_swizzle = pSource->swizzley; break;
1554 case 2: channel_swizzle = pSource->swizzlez; break;
1555 case 3: channel_swizzle = pSource->swizzlew; break;
1556 default: channel_swizzle = SQ_SEL_MASK; break;
1557 }
1558
1559 if(channel_swizzle == SQ_SEL_0)
1560 {
1561 src_sel = SQ_ALU_SRC_0;
1562 }
1563 else if (channel_swizzle == SQ_SEL_1)
1564 {
1565 src_sel = SQ_ALU_SRC_1;
1566 }
1567 else
1568 {
1569 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1570 (pSource->rtype == SRC_REG_INPUT)
1571 )
1572 {
1573 src_sel = pSource->reg;
1574 }
1575 else if (pSource->rtype == SRC_REG_CONSTANT)
1576 {
1577 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1578 }
1579 else if (pSource->rtype == SRC_REC_LITERAL)
1580 {
1581 src_sel = SQ_ALU_SRC_LITERAL;
1582 }
1583 else
1584 {
1585 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1586 source_index, pSource->rtype);
1587 return GL_FALSE;
1588 }
1589 }
1590
1591 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1592 {
1593 src_rel = SQ_ABSOLUTE;
1594 }
1595 else
1596 {
1597 src_rel = SQ_RELATIVE;
1598 }
1599
1600 switch (channel_swizzle)
1601 {
1602 case SQ_SEL_X:
1603 src_chan = SQ_CHAN_X;
1604 break;
1605 case SQ_SEL_Y:
1606 src_chan = SQ_CHAN_Y;
1607 break;
1608 case SQ_SEL_Z:
1609 src_chan = SQ_CHAN_Z;
1610 break;
1611 case SQ_SEL_W:
1612 src_chan = SQ_CHAN_W;
1613 break;
1614 case SQ_SEL_0:
1615 case SQ_SEL_1:
1616 // Does not matter since src_sel controls
1617 src_chan = SQ_CHAN_X;
1618 break;
1619 default:
1620 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1621 return GL_FALSE;
1622 break;
1623 }
1624
1625 switch (scalar_channel_index)
1626 {
1627 case 0: src_neg = pSource->negx; break;
1628 case 1: src_neg = pSource->negy; break;
1629 case 2: src_neg = pSource->negz; break;
1630 case 3: src_neg = pSource->negw; break;
1631 default: src_neg = 0; break;
1632 }
1633
1634 switch (source_index)
1635 {
1636 case 0:
1637 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1638 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1639 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1640 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1641 break;
1642 case 1:
1643 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1644 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1645 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1646 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1647 break;
1648 case 2:
1649 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1650 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1651 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1652 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1653 break;
1654 default:
1655 radeon_error("Only three sources allowed in ALU opcodes.\n");
1656 return GL_FALSE;
1657 break;
1658 }
1659
1660 return GL_TRUE;
1661 }
1662
1663 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1664 R700ALUInstruction* alu_instruction_ptr,
1665 GLuint contiguous_slots_needed)
1666 {
1667 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1668 {
1669 return GL_FALSE;
1670 }
1671
1672 if ( pAsm->alu_x_opcode != 0 ||
1673 pAsm->cf_current_alu_clause_ptr == NULL ||
1674 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1675 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1676 ) )
1677 {
1678
1679 //new cf inst for this clause
1680 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1681
1682 // link the new cf to cf segment
1683 if(NULL != pAsm->cf_current_alu_clause_ptr)
1684 {
1685 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1686 AddCFInstruction( pAsm->pR700Shader,
1687 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1688 }
1689 else
1690 {
1691 radeon_error("Could not allocate a new ALU CF instruction.\n");
1692 return GL_FALSE;
1693 }
1694
1695 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1696 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1697 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1698
1699 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1700 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1701 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1702
1703 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1704
1705 if(pAsm->alu_x_opcode != 0)
1706 {
1707 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
1708 pAsm->alu_x_opcode = 0;
1709 }
1710 else
1711 {
1712 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1713 }
1714
1715 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1716
1717 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1718 }
1719 else
1720 {
1721 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1722 }
1723
1724 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1725 // set the whole_quad_mode for this clause
1726 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1727 {
1728 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1729 }
1730
1731 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1732 {
1733 alu_instruction_ptr->m_Word0.f.last = 1;
1734 }
1735
1736 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1737 {
1738 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1739 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1740 }
1741
1742 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1743
1744 return GL_TRUE;
1745 }
1746
1747 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1748 int source_index,
1749 BITS* psrc_sel,
1750 BITS* psrc_rel,
1751 BITS* psrc_chan,
1752 BITS* psrc_neg)
1753 {
1754 switch (source_index)
1755 {
1756 case 0:
1757 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1758 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1759 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1760 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1761 break;
1762
1763 case 1:
1764 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1765 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1766 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1767 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1768 break;
1769
1770 case 2:
1771 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1772 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1773 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1774 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1775 break;
1776 }
1777 }
1778
1779 int is_cfile(BITS sel)
1780 {
1781 if (sel > 255 && sel < 512)
1782 {
1783 return 1;
1784 }
1785 return 0;
1786 }
1787
1788 int is_const(BITS sel)
1789 {
1790 if (is_cfile(sel))
1791 {
1792 return 1;
1793 }
1794 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1795 {
1796 return 1;
1797 }
1798 return 0;
1799 }
1800
1801 int is_gpr(BITS sel)
1802 {
1803 if (sel >= 0 && sel < 128)
1804 {
1805 return 1;
1806 }
1807 return 0;
1808 }
1809
1810 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1811 SQ_ALU_VEC_120, //001
1812 SQ_ALU_VEC_102, //010
1813
1814 SQ_ALU_VEC_201, //011
1815 SQ_ALU_VEC_012, //100
1816 SQ_ALU_VEC_021, //101
1817
1818 SQ_ALU_VEC_012, //110
1819 SQ_ALU_VEC_012}; //111
1820
1821 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1822 SQ_ALU_SCL_122, //001
1823 SQ_ALU_SCL_122, //010
1824
1825 SQ_ALU_SCL_221, //011
1826 SQ_ALU_SCL_212, //100
1827 SQ_ALU_SCL_122, //101
1828
1829 SQ_ALU_SCL_122, //110
1830 SQ_ALU_SCL_122}; //111
1831
1832 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1833 GLuint sel,
1834 GLuint chan)
1835 {
1836 int res_match = (-1);
1837 int res_empty = (-1);
1838
1839 GLint res;
1840
1841 for (res=3; res>=0; res--)
1842 {
1843 if(pAsm->hw_cfile_addr[ res] < 0)
1844 {
1845 res_empty = res;
1846 }
1847 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1848 &&
1849 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1850 {
1851 res_match = res;
1852 }
1853 }
1854
1855 if(res_match >= 0)
1856 {
1857 // Read for this scalar component already reserved, nothing to do here.
1858 ;
1859 }
1860 else if(res_empty >= 0)
1861 {
1862 pAsm->hw_cfile_addr[ res_empty ] = sel;
1863 pAsm->hw_cfile_chan[ res_empty ] = chan;
1864 }
1865 else
1866 {
1867 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1868 return GL_FALSE;
1869 }
1870 return GL_TRUE;
1871 }
1872
1873 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1874 {
1875 if(pAsm->hw_gpr[cycle][chan] < 0)
1876 {
1877 pAsm->hw_gpr[cycle][chan] = sel;
1878 }
1879 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1880 {
1881 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1882 return GL_FALSE;
1883 }
1884
1885 return GL_TRUE;
1886 }
1887
1888 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1889 {
1890 switch (swiz)
1891 {
1892 case SQ_ALU_SCL_210:
1893 {
1894 int table[3] = {2, 1, 0};
1895 *pCycle = table[sel];
1896 return GL_TRUE;
1897 }
1898 break;
1899 case SQ_ALU_SCL_122:
1900 {
1901 int table[3] = {1, 2, 2};
1902 *pCycle = table[sel];
1903 return GL_TRUE;
1904 }
1905 break;
1906 case SQ_ALU_SCL_212:
1907 {
1908 int table[3] = {2, 1, 2};
1909 *pCycle = table[sel];
1910 return GL_TRUE;
1911 }
1912 break;
1913 case SQ_ALU_SCL_221:
1914 {
1915 int table[3] = {2, 2, 1};
1916 *pCycle = table[sel];
1917 return GL_TRUE;
1918 }
1919 break;
1920 default:
1921 radeon_error("Bad Scalar bank swizzle value\n");
1922 break;
1923 }
1924
1925 return GL_FALSE;
1926 }
1927
1928 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1929 {
1930 switch (swiz)
1931 {
1932 case SQ_ALU_VEC_012:
1933 {
1934 int table[3] = {0, 1, 2};
1935 *pCycle = table[sel];
1936 }
1937 break;
1938 case SQ_ALU_VEC_021:
1939 {
1940 int table[3] = {0, 2, 1};
1941 *pCycle = table[sel];
1942 }
1943 break;
1944 case SQ_ALU_VEC_120:
1945 {
1946 int table[3] = {1, 2, 0};
1947 *pCycle = table[sel];
1948 }
1949 break;
1950 case SQ_ALU_VEC_102:
1951 {
1952 int table[3] = {1, 0, 2};
1953 *pCycle = table[sel];
1954 }
1955 break;
1956 case SQ_ALU_VEC_201:
1957 {
1958 int table[3] = {2, 0, 1};
1959 *pCycle = table[sel];
1960 }
1961 break;
1962 case SQ_ALU_VEC_210:
1963 {
1964 int table[3] = {2, 1, 0};
1965 *pCycle = table[sel];
1966 }
1967 break;
1968 default:
1969 radeon_error("Bad Vec bank swizzle value\n");
1970 return GL_FALSE;
1971 break;
1972 }
1973
1974 return GL_TRUE;
1975 }
1976
1977 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1978 R700ALUInstruction* alu_instruction_ptr)
1979 {
1980 GLuint cycle;
1981 GLuint bank_swizzle;
1982 GLuint const_count = 0;
1983
1984 BITS sel;
1985 BITS chan;
1986 BITS rel;
1987 BITS neg;
1988
1989 GLuint src;
1990
1991 BITS src_sel [3] = {0,0,0};
1992 BITS src_chan[3] = {0,0,0};
1993 BITS src_rel [3] = {0,0,0};
1994 BITS src_neg [3] = {0,0,0};
1995
1996 GLuint swizzle_key;
1997
1998 GLuint number_of_operands = r700GetNumOperands(pAsm);
1999
2000 for (src=0; src<number_of_operands; src++)
2001 {
2002 get_src_properties(alu_instruction_ptr,
2003 src,
2004 &(src_sel[src]),
2005 &(src_rel[src]),
2006 &(src_chan[src]),
2007 &(src_neg[src]) );
2008 }
2009
2010
2011 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2012 (is_const( src_sel[1] ) ? 2 : 0) +
2013 (is_const( src_sel[2] ) ? 1 : 0) );
2014
2015 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2016
2017 for (src=0; src<number_of_operands; src++)
2018 {
2019 sel = src_sel [src];
2020 chan = src_chan[src];
2021 rel = src_rel [src];
2022 neg = src_neg [src];
2023
2024 if (is_const( sel ))
2025 {
2026 // Any constant, including literal and inline constants
2027 const_count++;
2028
2029 if (is_cfile( sel ))
2030 {
2031 reserve_cfile(pAsm, sel, chan);
2032 }
2033
2034 }
2035 }
2036
2037 for (src=0; src<number_of_operands; src++)
2038 {
2039 sel = src_sel [src];
2040 chan = src_chan[src];
2041 rel = src_rel [src];
2042 neg = src_neg [src];
2043
2044 if( is_gpr(sel) )
2045 {
2046 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2047
2048 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2049 {
2050 return GL_FALSE;
2051 }
2052
2053 if(cycle < const_count)
2054 {
2055 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2056 {
2057 return GL_FALSE;
2058 }
2059 }
2060 }
2061 }
2062
2063 return GL_TRUE;
2064 }
2065
2066 GLboolean check_vector(r700_AssemblerBase* pAsm,
2067 R700ALUInstruction* alu_instruction_ptr)
2068 {
2069 GLuint cycle;
2070 GLuint bank_swizzle;
2071 GLuint const_count = 0;
2072
2073 GLuint src;
2074
2075 BITS sel;
2076 BITS chan;
2077 BITS rel;
2078 BITS neg;
2079
2080 BITS src_sel [3] = {0,0,0};
2081 BITS src_chan[3] = {0,0,0};
2082 BITS src_rel [3] = {0,0,0};
2083 BITS src_neg [3] = {0,0,0};
2084
2085 GLuint swizzle_key;
2086
2087 GLuint number_of_operands = r700GetNumOperands(pAsm);
2088
2089 for (src=0; src<number_of_operands; src++)
2090 {
2091 get_src_properties(alu_instruction_ptr,
2092 src,
2093 &(src_sel[src]),
2094 &(src_rel[src]),
2095 &(src_chan[src]),
2096 &(src_neg[src]) );
2097 }
2098
2099
2100 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2101 (is_const( src_sel[1] ) ? 2 : 0) +
2102 (is_const( src_sel[2] ) ? 1 : 0)
2103 );
2104
2105 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2106
2107 for (src=0; src<number_of_operands; src++)
2108 {
2109 sel = src_sel [src];
2110 chan = src_chan[src];
2111 rel = src_rel [src];
2112 neg = src_neg [src];
2113
2114
2115 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2116
2117 if( is_gpr(sel) )
2118 {
2119 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2120 {
2121 return GL_FALSE;
2122 }
2123
2124 if ( (src == 1) &&
2125 (sel == src_sel[0]) &&
2126 (chan == src_chan[0]) )
2127 {
2128 }
2129 else
2130 {
2131 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2132 {
2133 return GL_FALSE;
2134 }
2135 }
2136 }
2137 else if( is_const(sel) )
2138 {
2139 const_count++;
2140
2141 if( is_cfile(sel) )
2142 {
2143 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2144 {
2145 return GL_FALSE;
2146 }
2147 }
2148 }
2149 }
2150
2151 return GL_TRUE;
2152 }
2153
2154 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2155 {
2156 GLuint number_of_scalar_operations;
2157 GLboolean is_single_scalar_operation;
2158 GLuint scalar_channel_index;
2159
2160 PVSSRC * pcurrent_source;
2161 int current_source_index;
2162 GLuint contiguous_slots_needed;
2163
2164 GLuint uNumSrc = r700GetNumOperands(pAsm);
2165 //GLuint channel_swizzle, j;
2166 //GLuint chan_counter[4] = {0, 0, 0, 0};
2167 //PVSSRC * pSource[3];
2168 GLboolean bSplitInst = GL_FALSE;
2169
2170 if (1 == pAsm->D.dst.math)
2171 {
2172 is_single_scalar_operation = GL_TRUE;
2173 number_of_scalar_operations = 1;
2174 }
2175 else
2176 {
2177 is_single_scalar_operation = GL_FALSE;
2178 number_of_scalar_operations = 4;
2179
2180 /* current assembler doesn't do more than 1 register per source */
2181 #if 0
2182 /* check read port, only very preliminary algorithm, not count in
2183 src0/1 same comp case and prev slot repeat case; also not count relative
2184 addressing. TODO: improve performance. */
2185 for(j=0; j<uNumSrc; j++)
2186 {
2187 pSource[j] = &(pAsm->S[j].src);
2188 }
2189 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2190 {
2191 for(j=0; j<uNumSrc; j++)
2192 {
2193 switch (scalar_channel_index)
2194 {
2195 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2196 case 1: channel_swizzle = pSource[j]->swizzley; break;
2197 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2198 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2199 default: channel_swizzle = SQ_SEL_MASK; break;
2200 }
2201 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2202 (pSource[j]->rtype == SRC_REG_INPUT))
2203 && (channel_swizzle <= SQ_SEL_W) )
2204 {
2205 chan_counter[channel_swizzle]++;
2206 }
2207 }
2208 }
2209 if( (chan_counter[SQ_SEL_X] > 3)
2210 || (chan_counter[SQ_SEL_Y] > 3)
2211 || (chan_counter[SQ_SEL_Z] > 3)
2212 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2213 {
2214 bSplitInst = GL_TRUE;
2215 }
2216 #endif
2217 }
2218
2219 contiguous_slots_needed = 0;
2220
2221 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2222 {
2223 contiguous_slots_needed = 4;
2224 }
2225
2226 initialize(pAsm);
2227
2228 for (scalar_channel_index=0;
2229 scalar_channel_index < number_of_scalar_operations;
2230 scalar_channel_index++)
2231 {
2232 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2233 if (alu_instruction_ptr == NULL)
2234 {
2235 return GL_FALSE;
2236 }
2237 Init_R700ALUInstruction(alu_instruction_ptr);
2238
2239 //src 0
2240 current_source_index = 0;
2241 pcurrent_source = &(pAsm->S[0].src);
2242
2243 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2244 current_source_index,
2245 pcurrent_source,
2246 scalar_channel_index) )
2247 {
2248 return GL_FALSE;
2249 }
2250
2251 if (uNumSrc > 1)
2252 {
2253 // Process source 1
2254 current_source_index = 1;
2255 pcurrent_source = &(pAsm->S[current_source_index].src);
2256
2257 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2258 current_source_index,
2259 pcurrent_source,
2260 scalar_channel_index) )
2261 {
2262 return GL_FALSE;
2263 }
2264 }
2265
2266 //other bits
2267 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
2268
2269 if( (is_single_scalar_operation == GL_TRUE)
2270 || (GL_TRUE == bSplitInst) )
2271 {
2272 alu_instruction_ptr->m_Word0.f.last = 1;
2273 }
2274 else
2275 {
2276 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2277 }
2278
2279 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2280 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2281 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2282
2283 // dst
2284 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2285 (pAsm->D.dst.rtype == DST_REG_OUT) )
2286 {
2287 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2288 }
2289 else
2290 {
2291 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2292 return GL_FALSE;
2293 }
2294
2295 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2296
2297 if ( is_single_scalar_operation == GL_TRUE )
2298 {
2299 // Override scalar_channel_index since only one scalar value will be written
2300 if(pAsm->D.dst.writex)
2301 {
2302 scalar_channel_index = 0;
2303 }
2304 else if(pAsm->D.dst.writey)
2305 {
2306 scalar_channel_index = 1;
2307 }
2308 else if(pAsm->D.dst.writez)
2309 {
2310 scalar_channel_index = 2;
2311 }
2312 else if(pAsm->D.dst.writew)
2313 {
2314 scalar_channel_index = 3;
2315 }
2316 }
2317
2318 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2319
2320 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2321
2322 if (pAsm->D.dst.op3)
2323 {
2324 //op3
2325
2326 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2327
2328 //There's 3rd src for op3
2329 current_source_index = 2;
2330 pcurrent_source = &(pAsm->S[current_source_index].src);
2331
2332 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2333 current_source_index,
2334 pcurrent_source,
2335 scalar_channel_index) )
2336 {
2337 return GL_FALSE;
2338 }
2339 }
2340 else
2341 {
2342 //op2
2343 if (pAsm->bR6xx)
2344 {
2345 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2346
2347 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2348 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2349
2350 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2351 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2352 switch (scalar_channel_index)
2353 {
2354 case 0:
2355 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2356 break;
2357 case 1:
2358 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2359 break;
2360 case 2:
2361 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2362 break;
2363 case 3:
2364 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2365 break;
2366 default:
2367 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2368 break;
2369 }
2370 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2371 }
2372 else
2373 {
2374 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2375
2376 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2377 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2378
2379 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2380 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2381 switch (scalar_channel_index)
2382 {
2383 case 0:
2384 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2385 break;
2386 case 1:
2387 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2388 break;
2389 case 2:
2390 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2391 break;
2392 case 3:
2393 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2394 break;
2395 default:
2396 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2397 break;
2398 }
2399 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2400 }
2401 }
2402
2403 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2404 {
2405 return GL_FALSE;
2406 }
2407
2408 /*
2409 * Judge the type of current instruction, is it vector or scalar
2410 * instruction.
2411 */
2412 if (is_single_scalar_operation)
2413 {
2414 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2415 {
2416 return GL_FALSE;
2417 }
2418 }
2419 else
2420 {
2421 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2422 {
2423 return 1;
2424 }
2425 }
2426
2427 contiguous_slots_needed = 0;
2428 }
2429
2430 return GL_TRUE;
2431 }
2432
2433 GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
2434 {
2435 GLuint number_of_scalar_operations;
2436 GLboolean is_single_scalar_operation;
2437 GLuint scalar_channel_index;
2438
2439 PVSSRC * pcurrent_source;
2440 int current_source_index;
2441 GLuint contiguous_slots_needed;
2442
2443 GLuint uNumSrc = r700GetNumOperands(pAsm);
2444
2445 GLboolean bSplitInst = GL_FALSE;
2446
2447 if (1 == pAsm->D.dst.math)
2448 {
2449 is_single_scalar_operation = GL_TRUE;
2450 number_of_scalar_operations = 1;
2451 }
2452 else
2453 {
2454 is_single_scalar_operation = GL_FALSE;
2455 number_of_scalar_operations = 4;
2456 }
2457
2458 contiguous_slots_needed = 0;
2459
2460 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2461 {
2462 contiguous_slots_needed = 4;
2463 }
2464
2465 initialize(pAsm);
2466
2467 for (scalar_channel_index=0;
2468 scalar_channel_index < number_of_scalar_operations;
2469 scalar_channel_index++)
2470 {
2471 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2472 if (alu_instruction_ptr == NULL)
2473 {
2474 return GL_FALSE;
2475 }
2476 Init_R700ALUInstruction(alu_instruction_ptr);
2477
2478 //src 0
2479 current_source_index = 0;
2480 pcurrent_source = &(pAsm->S[0].src);
2481
2482 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2483 current_source_index,
2484 pcurrent_source,
2485 scalar_channel_index) )
2486 {
2487 return GL_FALSE;
2488 }
2489
2490 if (uNumSrc > 1)
2491 {
2492 // Process source 1
2493 current_source_index = 1;
2494 pcurrent_source = &(pAsm->S[current_source_index].src);
2495
2496 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2497 current_source_index,
2498 pcurrent_source,
2499 scalar_channel_index) )
2500 {
2501 return GL_FALSE;
2502 }
2503 }
2504
2505 //other bits
2506 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2507
2508 if( (is_single_scalar_operation == GL_TRUE)
2509 || (GL_TRUE == bSplitInst) )
2510 {
2511 alu_instruction_ptr->m_Word0.f.last = 1;
2512 }
2513 else
2514 {
2515 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2516 }
2517
2518 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
2519 if(1 == pAsm->D.dst.predicated)
2520 {
2521 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2522 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2523 }
2524 else
2525 {
2526 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2527 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2528 }
2529
2530 // dst
2531 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2532 (pAsm->D.dst.rtype == DST_REG_OUT) )
2533 {
2534 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2535 }
2536 else
2537 {
2538 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2539 return GL_FALSE;
2540 }
2541
2542 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2543
2544 if ( is_single_scalar_operation == GL_TRUE )
2545 {
2546 // Override scalar_channel_index since only one scalar value will be written
2547 if(pAsm->D.dst.writex)
2548 {
2549 scalar_channel_index = 0;
2550 }
2551 else if(pAsm->D.dst.writey)
2552 {
2553 scalar_channel_index = 1;
2554 }
2555 else if(pAsm->D.dst.writez)
2556 {
2557 scalar_channel_index = 2;
2558 }
2559 else if(pAsm->D.dst.writew)
2560 {
2561 scalar_channel_index = 3;
2562 }
2563 }
2564
2565 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2566
2567 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2568
2569 if (pAsm->D.dst.op3)
2570 {
2571 //op3
2572
2573 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2574
2575 //There's 3rd src for op3
2576 current_source_index = 2;
2577 pcurrent_source = &(pAsm->S[current_source_index].src);
2578
2579 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2580 current_source_index,
2581 pcurrent_source,
2582 scalar_channel_index) )
2583 {
2584 return GL_FALSE;
2585 }
2586 }
2587 else
2588 {
2589 //op2
2590 if (pAsm->bR6xx)
2591 {
2592 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2593
2594 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2595 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2596
2597 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2598 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2599 switch (scalar_channel_index)
2600 {
2601 case 0:
2602 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2603 break;
2604 case 1:
2605 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2606 break;
2607 case 2:
2608 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2609 break;
2610 case 3:
2611 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2612 break;
2613 default:
2614 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2615 break;
2616 }
2617 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2618 }
2619 else
2620 {
2621 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2622
2623 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2624 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2625
2626 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2627 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2628 switch (scalar_channel_index)
2629 {
2630 case 0:
2631 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2632 break;
2633 case 1:
2634 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2635 break;
2636 case 2:
2637 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2638 break;
2639 case 3:
2640 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2641 break;
2642 default:
2643 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2644 break;
2645 }
2646 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2647 }
2648 }
2649
2650 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2651 {
2652 return GL_FALSE;
2653 }
2654
2655 /*
2656 * Judge the type of current instruction, is it vector or scalar
2657 * instruction.
2658 */
2659 if (is_single_scalar_operation)
2660 {
2661 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2662 {
2663 return GL_FALSE;
2664 }
2665 }
2666 else
2667 {
2668 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2669 {
2670 return 1;
2671 }
2672 }
2673
2674 contiguous_slots_needed = 0;
2675 }
2676
2677 return GL_TRUE;
2678 }
2679
2680 GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
2681 {
2682 R700ALUInstruction * alu_instruction_ptr;
2683 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2684 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2685
2686 GLuint number_of_scalar_operations;
2687 GLboolean is_single_scalar_operation;
2688 GLuint scalar_channel_index;
2689
2690 GLuint contiguous_slots_needed;
2691 GLuint lastInstruction;
2692 GLuint not_masked[4];
2693
2694 GLuint uNumSrc = r700GetNumOperands(pAsm);
2695
2696 GLboolean bSplitInst = GL_FALSE;
2697
2698 number_of_scalar_operations = 0;
2699 contiguous_slots_needed = 0;
2700
2701 if(1 == pAsm->D.dst.writew)
2702 {
2703 lastInstruction = 3;
2704 number_of_scalar_operations++;
2705 not_masked[3] = 1;
2706 }
2707 else
2708 {
2709 not_masked[3] = 0;
2710 }
2711 if(1 == pAsm->D.dst.writez)
2712 {
2713 lastInstruction = 2;
2714 number_of_scalar_operations++;
2715 not_masked[2] = 1;
2716 }
2717 else
2718 {
2719 not_masked[2] = 0;
2720 }
2721 if(1 == pAsm->D.dst.writey)
2722 {
2723 lastInstruction = 1;
2724 number_of_scalar_operations++;
2725 not_masked[1] = 1;
2726 }
2727 else
2728 {
2729 not_masked[1] = 0;
2730 }
2731 if(1 == pAsm->D.dst.writex)
2732 {
2733 lastInstruction = 0;
2734 number_of_scalar_operations++;
2735 not_masked[0] = 1;
2736 }
2737 else
2738 {
2739 not_masked[0] = 0;
2740 }
2741
2742 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2743 {
2744 contiguous_slots_needed = 4;
2745 }
2746 else
2747 {
2748 contiguous_slots_needed = number_of_scalar_operations;
2749 }
2750
2751 if(1 == pAsm->D2.dst2.literal)
2752 {
2753 contiguous_slots_needed += 1;
2754 }
2755 else if(2 == pAsm->D2.dst2.literal)
2756 {
2757 contiguous_slots_needed += 2;
2758 }
2759
2760 initialize(pAsm);
2761
2762 for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++)
2763 {
2764 if(0 == not_masked[scalar_channel_index])
2765 {
2766 continue;
2767 }
2768
2769 if(scalar_channel_index == lastInstruction)
2770 {
2771 switch (pAsm->D2.dst2.literal)
2772 {
2773 case 0:
2774 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2775 if (alu_instruction_ptr == NULL)
2776 {
2777 return GL_FALSE;
2778 }
2779 Init_R700ALUInstruction(alu_instruction_ptr);
2780 break;
2781 case 1:
2782 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2783 if (alu_instruction_ptr_hl == NULL)
2784 {
2785 return GL_FALSE;
2786 }
2787 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]);
2788 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2789 break;
2790 case 2:
2791 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2792 if (alu_instruction_ptr_fl == NULL)
2793 {
2794 return GL_FALSE;
2795 }
2796 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]);
2797 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2798 break;
2799 default:
2800 break;
2801 };
2802 }
2803 else
2804 {
2805 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2806 if (alu_instruction_ptr == NULL)
2807 {
2808 return GL_FALSE;
2809 }
2810 Init_R700ALUInstruction(alu_instruction_ptr);
2811 }
2812
2813 //src 0
2814 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2815 0,
2816 &(pAsm->S[0].src),
2817 scalar_channel_index) )
2818 {
2819 return GL_FALSE;
2820 }
2821
2822 if (uNumSrc > 1)
2823 {
2824 // Process source 1
2825 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2826 1,
2827 &(pAsm->S[1].src),
2828 scalar_channel_index) )
2829 {
2830 return GL_FALSE;
2831 }
2832 }
2833
2834 //other bits
2835 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2836
2837 if(scalar_channel_index == lastInstruction)
2838 {
2839 alu_instruction_ptr->m_Word0.f.last = 1;
2840 }
2841
2842 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2843 if(1 == pAsm->D.dst.predicated)
2844 {
2845 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2846 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2847 }
2848 else
2849 {
2850 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0;
2851 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0;
2852 }
2853
2854 // dst
2855 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2856 (pAsm->D.dst.rtype == DST_REG_OUT) )
2857 {
2858 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2859 }
2860 else
2861 {
2862 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2863 return GL_FALSE;
2864 }
2865
2866 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2867
2868 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2869
2870 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2871
2872 if (pAsm->D.dst.op3)
2873 {
2874 //op3
2875 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2876
2877 //There's 3rd src for op3
2878 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2879 2,
2880 &(pAsm->S[2].src),
2881 scalar_channel_index) )
2882 {
2883 return GL_FALSE;
2884 }
2885 }
2886 else
2887 {
2888 //op2
2889 if (pAsm->bR6xx)
2890 {
2891 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2892 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2893 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2894 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1;
2895 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2896 }
2897 else
2898 {
2899 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2900 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2901 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2902 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1;
2903 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2904 }
2905 }
2906
2907 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2908 {
2909 return GL_FALSE;
2910 }
2911
2912 if (1 == number_of_scalar_operations)
2913 {
2914 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2915 {
2916 return GL_FALSE;
2917 }
2918 }
2919 else
2920 {
2921 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2922 {
2923 return GL_FALSE;
2924 }
2925 }
2926
2927 contiguous_slots_needed -= 2;
2928 }
2929
2930 return GL_TRUE;
2931 }
2932
2933 GLboolean next_ins(r700_AssemblerBase *pAsm)
2934 {
2935 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2936
2937 if( GL_TRUE == pAsm->is_tex )
2938 {
2939 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2940 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2941 {
2942 radeon_error("Error assembling TEX instruction\n");
2943 return GL_FALSE;
2944 }
2945 } else {
2946 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2947 {
2948 radeon_error("Error assembling TEX instruction\n");
2949 return GL_FALSE;
2950 }
2951 }
2952 }
2953 else
2954 { //ALU
2955 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2956 {
2957 radeon_error("Error assembling ALU instruction\n");
2958 return GL_FALSE;
2959 }
2960 }
2961
2962 if(pAsm->D.dst.rtype == DST_REG_OUT)
2963 {
2964 if(pAsm->D.dst.op3)
2965 {
2966 // There is no mask for OP3 instructions, so all channels are written
2967 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2968 }
2969 else
2970 {
2971 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2972 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2973 }
2974 }
2975
2976 //reset for next inst.
2977 pAsm->D.bits = 0;
2978 pAsm->D2.bits = 0;
2979 pAsm->S[0].bits = 0;
2980 pAsm->S[1].bits = 0;
2981 pAsm->S[2].bits = 0;
2982 pAsm->is_tex = GL_FALSE;
2983 pAsm->need_tex_barrier = GL_FALSE;
2984
2985 return GL_TRUE;
2986 }
2987
2988 GLboolean next_ins2(r700_AssemblerBase *pAsm)
2989 {
2990 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2991
2992 //ALU
2993 if( GL_FALSE == assemble_alu_instruction2(pAsm) )
2994 {
2995 radeon_error("Error assembling ALU instruction\n");
2996 return GL_FALSE;
2997 }
2998
2999 if(pAsm->D.dst.rtype == DST_REG_OUT)
3000 {
3001 if(pAsm->D.dst.op3)
3002 {
3003 // There is no mask for OP3 instructions, so all channels are written
3004 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
3005 }
3006 else
3007 {
3008 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
3009 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
3010 }
3011 }
3012
3013 //reset for next inst.
3014 pAsm->D.bits = 0;
3015 pAsm->D2.bits = 0;
3016 pAsm->S[0].bits = 0;
3017 pAsm->S[1].bits = 0;
3018 pAsm->S[2].bits = 0;
3019 pAsm->is_tex = GL_FALSE;
3020 pAsm->need_tex_barrier = GL_FALSE;
3021
3022 //richard nov.16 glsl
3023 pAsm->D2.bits = 0;
3024
3025 return GL_TRUE;
3026 }
3027
3028 /* not work yet */
3029 GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
3030 {
3031 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3032
3033 //ALU
3034 if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) )
3035 {
3036 radeon_error("Error assembling ALU instruction\n");
3037 return GL_FALSE;
3038 }
3039
3040 //reset for next inst.
3041 pAsm->D.bits = 0;
3042 pAsm->D2.bits = 0;
3043 pAsm->S[0].bits = 0;
3044 pAsm->S[1].bits = 0;
3045 pAsm->S[2].bits = 0;
3046 pAsm->is_tex = GL_FALSE;
3047 pAsm->need_tex_barrier = GL_FALSE;
3048 return GL_TRUE;
3049 }
3050
3051 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
3052 {
3053 BITS tmp;
3054
3055 checkop1(pAsm);
3056
3057 tmp = gethelpr(pAsm);
3058
3059 // opcode tmp.x, a.x
3060 // MOV dst, tmp.x
3061
3062 pAsm->D.dst.opcode = opcode;
3063 pAsm->D.dst.math = 1;
3064
3065 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3066 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3067 pAsm->D.dst.reg = tmp;
3068 pAsm->D.dst.writex = 1;
3069
3070 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3071 {
3072 return GL_FALSE;
3073 }
3074
3075 if ( GL_FALSE == next_ins(pAsm) )
3076 {
3077 return GL_FALSE;
3078 }
3079
3080 // Now replicate result to all necessary channels in destination
3081 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3082
3083 if( GL_FALSE == assemble_dst(pAsm) )
3084 {
3085 return GL_FALSE;
3086 }
3087
3088 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3089 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3090 pAsm->S[0].src.reg = tmp;
3091
3092 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3093 noneg_PVSSRC(&(pAsm->S[0].src));
3094
3095 if( GL_FALSE == next_ins(pAsm) )
3096 {
3097 return GL_FALSE;
3098 }
3099
3100 return GL_TRUE;
3101 }
3102
3103 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
3104 {
3105 checkop1(pAsm);
3106
3107 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3108
3109 if( GL_FALSE == assemble_dst(pAsm) )
3110 {
3111 return GL_FALSE;
3112 }
3113 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3114 {
3115 return GL_FALSE;
3116 }
3117
3118 pAsm->S[1].bits = pAsm->S[0].bits;
3119 flipneg_PVSSRC(&(pAsm->S[1].src));
3120
3121 if ( GL_FALSE == next_ins(pAsm) )
3122 {
3123 return GL_FALSE;
3124 }
3125
3126 return GL_TRUE;
3127 }
3128
3129 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
3130 {
3131 if( GL_FALSE == checkop2(pAsm) )
3132 {
3133 return GL_FALSE;
3134 }
3135
3136 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3137
3138 if( GL_FALSE == assemble_dst(pAsm) )
3139 {
3140 return GL_FALSE;
3141 }
3142
3143 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3144 {
3145 return GL_FALSE;
3146 }
3147
3148 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3149 {
3150 return GL_FALSE;
3151 }
3152
3153 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
3154 {
3155 flipneg_PVSSRC(&(pAsm->S[1].src));
3156 }
3157
3158 if( GL_FALSE == next_ins(pAsm) )
3159 {
3160 return GL_FALSE;
3161 }
3162
3163 return GL_TRUE;
3164 }
3165
3166 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
3167 { /* TODO: ar values dont' persist between clauses */
3168 if( GL_FALSE == checkop1(pAsm) )
3169 {
3170 return GL_FALSE;
3171 }
3172
3173 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
3174 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3175 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3176 pAsm->D.dst.reg = 0;
3177 pAsm->D.dst.writex = 0;
3178 pAsm->D.dst.writey = 0;
3179 pAsm->D.dst.writez = 0;
3180 pAsm->D.dst.writew = 0;
3181
3182 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3183 {
3184 return GL_FALSE;
3185 }
3186
3187 if( GL_FALSE == next_ins(pAsm) )
3188 {
3189 return GL_FALSE;
3190 }
3191
3192 return GL_TRUE;
3193 }
3194
3195 GLboolean assemble_BAD(char *opcode_str)
3196 {
3197 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
3198 return GL_FALSE;
3199 }
3200
3201 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
3202 {
3203 int tmp;
3204
3205 if( GL_FALSE == checkop3(pAsm) )
3206 {
3207 return GL_FALSE;
3208 }
3209
3210 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
3211 pAsm->D.dst.op3 = 1;
3212
3213 tmp = (-1);
3214
3215 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3216 {
3217 //OP3 has no support for write mask
3218 tmp = gethelpr(pAsm);
3219
3220 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3221 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3222 pAsm->D.dst.reg = tmp;
3223
3224 nomask_PVSDST(&(pAsm->D.dst));
3225 }
3226 else
3227 {
3228 if( GL_FALSE == assemble_dst(pAsm) )
3229 {
3230 return GL_FALSE;
3231 }
3232 }
3233
3234 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3235 {
3236 return GL_FALSE;
3237 }
3238
3239 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
3240 {
3241 return GL_FALSE;
3242 }
3243
3244 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
3245 {
3246 return GL_FALSE;
3247 }
3248
3249 if ( GL_FALSE == next_ins(pAsm) )
3250 {
3251 return GL_FALSE;
3252 }
3253
3254 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3255 {
3256 if( GL_FALSE == assemble_dst(pAsm) )
3257 {
3258 return GL_FALSE;
3259 }
3260
3261 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3262
3263 //tmp for source
3264 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3265 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3266 pAsm->S[0].src.reg = tmp;
3267
3268 noneg_PVSSRC(&(pAsm->S[0].src));
3269 noswizzle_PVSSRC(&(pAsm->S[0].src));
3270
3271 if( GL_FALSE == next_ins(pAsm) )
3272 {
3273 return GL_FALSE;
3274 }
3275 }
3276
3277 return GL_TRUE;
3278 }
3279
3280 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
3281 {
3282 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
3283 }
3284
3285 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
3286 {
3287 if( GL_FALSE == checkop2(pAsm) )
3288 {
3289 return GL_FALSE;
3290 }
3291
3292 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
3293
3294 if( GL_FALSE == assemble_dst(pAsm) )
3295 {
3296 return GL_FALSE;
3297 }
3298
3299 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3300 {
3301 return GL_FALSE;
3302 }
3303
3304 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3305 {
3306 return GL_FALSE;
3307 }
3308
3309 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3310 {
3311 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3312 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3313 }
3314 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
3315 {
3316 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3317 }
3318
3319 if ( GL_FALSE == next_ins(pAsm) )
3320 {
3321 return GL_FALSE;
3322 }
3323
3324 return GL_TRUE;
3325 }
3326
3327 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3328 {
3329 if( GL_FALSE == checkop2(pAsm) )
3330 {
3331 return GL_FALSE;
3332 }
3333
3334 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3335
3336 if( GL_FALSE == assemble_dst(pAsm) )
3337 {
3338 return GL_FALSE;
3339 }
3340
3341 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3342 {
3343 return GL_FALSE;
3344 }
3345
3346 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3347 {
3348 return GL_FALSE;
3349 }
3350
3351 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3352 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3353
3354 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3355 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3356
3357 if ( GL_FALSE == next_ins(pAsm) )
3358 {
3359 return GL_FALSE;
3360 }
3361
3362 return GL_TRUE;
3363 }
3364
3365 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3366 {
3367 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3368 }
3369
3370 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3371 {
3372 BITS tmp;
3373
3374 checkop1(pAsm);
3375
3376 tmp = gethelpr(pAsm);
3377
3378 // FLOOR tmp.x, a.x
3379 // EX2 dst.x tmp.x
3380
3381 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3382 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3383
3384 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3385 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3386 pAsm->D.dst.reg = tmp;
3387 pAsm->D.dst.writex = 1;
3388
3389 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3390 {
3391 return GL_FALSE;
3392 }
3393
3394 if( GL_FALSE == next_ins(pAsm) )
3395 {
3396 return GL_FALSE;
3397 }
3398
3399 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3400 pAsm->D.dst.math = 1;
3401
3402 if( GL_FALSE == assemble_dst(pAsm) )
3403 {
3404 return GL_FALSE;
3405 }
3406
3407 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3408
3409 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3410 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3411 pAsm->S[0].src.reg = tmp;
3412
3413 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3414 noneg_PVSSRC(&(pAsm->S[0].src));
3415
3416 if( GL_FALSE == next_ins(pAsm) )
3417 {
3418 return GL_FALSE;
3419 }
3420 }
3421
3422 // FRACT dst.y a.x
3423
3424 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3425 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3426
3427 if( GL_FALSE == assemble_dst(pAsm) )
3428 {
3429 return GL_FALSE;
3430 }
3431
3432 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3433 {
3434 return GL_FALSE;
3435 }
3436
3437 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3438
3439 if( GL_FALSE == next_ins(pAsm) )
3440 {
3441 return GL_FALSE;
3442 }
3443 }
3444
3445 // EX2 dst.z, a.x
3446
3447 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3448 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3449 pAsm->D.dst.math = 1;
3450
3451 if( GL_FALSE == assemble_dst(pAsm) )
3452 {
3453 return GL_FALSE;
3454 }
3455
3456 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3457 {
3458 return GL_FALSE;
3459 }
3460
3461 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3462
3463 if( GL_FALSE == next_ins(pAsm) )
3464 {
3465 return GL_FALSE;
3466 }
3467 }
3468
3469 // MOV dst.w 1.0
3470
3471 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3472 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3473
3474 if( GL_FALSE == assemble_dst(pAsm) )
3475 {
3476 return GL_FALSE;
3477 }
3478
3479 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3480
3481 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3482 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3483 pAsm->S[0].src.reg = tmp;
3484
3485 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3486 noneg_PVSSRC(&(pAsm->S[0].src));
3487
3488 if( GL_FALSE == next_ins(pAsm) )
3489 {
3490 return GL_FALSE;
3491 }
3492 }
3493
3494 return GL_TRUE;
3495 }
3496
3497 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3498 {
3499 checkop1(pAsm);
3500
3501 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3502
3503 if ( GL_FALSE == assemble_dst(pAsm) )
3504 {
3505 return GL_FALSE;
3506 }
3507
3508 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3509 {
3510 return GL_FALSE;
3511 }
3512
3513 if ( GL_FALSE == next_ins(pAsm) )
3514 {
3515 return GL_FALSE;
3516 }
3517
3518 return GL_TRUE;
3519 }
3520
3521 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3522 {
3523 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3524 }
3525
3526 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3527 {
3528 checkop1(pAsm);
3529
3530 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3531
3532 if ( GL_FALSE == assemble_dst(pAsm) )
3533 {
3534 return GL_FALSE;
3535 }
3536
3537 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3538 {
3539 return GL_FALSE;
3540 }
3541
3542 if ( GL_FALSE == next_ins(pAsm) )
3543 {
3544 return GL_FALSE;
3545 }
3546
3547 return GL_TRUE;
3548 }
3549
3550 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
3551 {
3552 /* TODO: doc says KILL has to be last(end) ALU clause */
3553
3554 checkop1(pAsm);
3555
3556 pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
3557
3558 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3559 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3560 pAsm->D.dst.reg = 0;
3561 pAsm->D.dst.writex = 0;
3562 pAsm->D.dst.writey = 0;
3563 pAsm->D.dst.writez = 0;
3564 pAsm->D.dst.writew = 0;
3565
3566 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3567 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3568 pAsm->S[0].src.reg = 0;
3569
3570 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3571 noneg_PVSSRC(&(pAsm->S[0].src));
3572
3573 if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
3574 {
3575 return GL_FALSE;
3576 }
3577
3578 if ( GL_FALSE == next_ins(pAsm) )
3579 {
3580 return GL_FALSE;
3581 }
3582
3583 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3584
3585 return GL_TRUE;
3586 }
3587
3588 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3589 {
3590 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3591 }
3592
3593 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3594 {
3595 BITS tmp;
3596
3597 if( GL_FALSE == checkop3(pAsm) )
3598 {
3599 return GL_FALSE;
3600 }
3601
3602 tmp = gethelpr(pAsm);
3603
3604 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3605
3606 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3607 pAsm->D.dst.reg = tmp;
3608 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3609 nomask_PVSDST(&(pAsm->D.dst));
3610
3611
3612 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3613 {
3614 return GL_FALSE;
3615 }
3616
3617 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3618 {
3619 return GL_FALSE;
3620 }
3621
3622 neg_PVSSRC(&(pAsm->S[1].src));
3623
3624 if( GL_FALSE == next_ins(pAsm) )
3625 {
3626 return GL_FALSE;
3627 }
3628
3629 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3630 pAsm->D.dst.op3 = 1;
3631
3632 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3633 pAsm->D.dst.reg = tmp;
3634 nomask_PVSDST(&(pAsm->D.dst));
3635 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3636
3637 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3638 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3639 pAsm->S[0].src.reg = tmp;
3640 noswizzle_PVSSRC(&(pAsm->S[0].src));
3641
3642
3643 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3644 {
3645 return GL_FALSE;
3646 }
3647 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3648 {
3649 return GL_FALSE;
3650 }
3651
3652 if( GL_FALSE == next_ins(pAsm) )
3653 {
3654 return GL_FALSE;
3655 }
3656
3657 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3658
3659 if( GL_FALSE == assemble_dst(pAsm) )
3660 {
3661 return GL_FALSE;
3662 }
3663
3664 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3665 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3666 pAsm->S[0].src.reg = tmp;
3667 noswizzle_PVSSRC(&(pAsm->S[0].src));
3668
3669 if( GL_FALSE == next_ins(pAsm) )
3670 {
3671 return GL_FALSE;
3672 }
3673
3674 return GL_TRUE;
3675 }
3676
3677 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3678 {
3679 BITS tmp1, tmp2, tmp3;
3680
3681 checkop1(pAsm);
3682
3683 tmp1 = gethelpr(pAsm);
3684 tmp2 = gethelpr(pAsm);
3685 tmp3 = gethelpr(pAsm);
3686
3687 // FIXME: The hardware can do fabs() directly on input
3688 // elements, but the compiler doesn't have the
3689 // capability to use that.
3690
3691 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3692
3693 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3694
3695 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3696 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3697 pAsm->D.dst.reg = tmp1;
3698 pAsm->D.dst.writex = 1;
3699
3700 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3701 {
3702 return GL_FALSE;
3703 }
3704
3705 pAsm->S[1].bits = pAsm->S[0].bits;
3706 flipneg_PVSSRC(&(pAsm->S[1].src));
3707
3708 if ( GL_FALSE == next_ins(pAsm) )
3709 {
3710 return GL_FALSE;
3711 }
3712
3713 // Entire algo:
3714 //
3715 // LG2 tmp2.x, tmp1.x
3716 // FLOOR tmp3.x, tmp2.x
3717 // MOV dst.x, tmp3.x
3718 // ADD tmp3.x, tmp2.x, -tmp3.x
3719 // EX2 dst.y, tmp3.x
3720 // MOV dst.z, tmp2.x
3721 // MOV dst.w, 1.0
3722
3723 // LG2 tmp2.x, tmp1.x
3724 // FLOOR tmp3.x, tmp2.x
3725
3726 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3727 pAsm->D.dst.math = 1;
3728
3729 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3730 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3731 pAsm->D.dst.reg = tmp2;
3732 pAsm->D.dst.writex = 1;
3733
3734 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3735 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3736 pAsm->S[0].src.reg = tmp1;
3737
3738 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3739 noneg_PVSSRC(&(pAsm->S[0].src));
3740
3741 if( GL_FALSE == next_ins(pAsm) )
3742 {
3743 return GL_FALSE;
3744 }
3745
3746 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3747
3748 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3749 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3750 pAsm->D.dst.reg = tmp3;
3751 pAsm->D.dst.writex = 1;
3752
3753 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3754 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3755 pAsm->S[0].src.reg = tmp2;
3756
3757 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3758 noneg_PVSSRC(&(pAsm->S[0].src));
3759
3760 if( GL_FALSE == next_ins(pAsm) )
3761 {
3762 return GL_FALSE;
3763 }
3764
3765 // MOV dst.x, tmp3.x
3766
3767 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3768
3769 if( GL_FALSE == assemble_dst(pAsm) )
3770 {
3771 return GL_FALSE;
3772 }
3773
3774 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3775
3776 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3777 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3778 pAsm->S[0].src.reg = tmp3;
3779
3780 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3781 noneg_PVSSRC(&(pAsm->S[0].src));
3782
3783 if( GL_FALSE == next_ins(pAsm) )
3784 {
3785 return GL_FALSE;
3786 }
3787
3788 // ADD tmp3.x, tmp2.x, -tmp3.x
3789 // EX2 dst.y, tmp3.x
3790
3791 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3792
3793 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3794 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3795 pAsm->D.dst.reg = tmp3;
3796 pAsm->D.dst.writex = 1;
3797
3798 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3799 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3800 pAsm->S[0].src.reg = tmp2;
3801
3802 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3803 noneg_PVSSRC(&(pAsm->S[0].src));
3804
3805 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3806 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3807 pAsm->S[1].src.reg = tmp3;
3808
3809 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3810 neg_PVSSRC(&(pAsm->S[1].src));
3811
3812 if( GL_FALSE == next_ins(pAsm) )
3813 {
3814 return GL_FALSE;
3815 }
3816
3817 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3818 pAsm->D.dst.math = 1;
3819
3820 if( GL_FALSE == assemble_dst(pAsm) )
3821 {
3822 return GL_FALSE;
3823 }
3824
3825 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3826
3827 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3828 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3829 pAsm->S[0].src.reg = tmp3;
3830
3831 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3832 noneg_PVSSRC(&(pAsm->S[0].src));
3833
3834 if( GL_FALSE == next_ins(pAsm) )
3835 {
3836 return GL_FALSE;
3837 }
3838
3839 // MOV dst.z, tmp2.x
3840
3841 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3842
3843 if( GL_FALSE == assemble_dst(pAsm) )
3844 {
3845 return GL_FALSE;
3846 }
3847
3848 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3849
3850 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3851 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3852 pAsm->S[0].src.reg = tmp2;
3853
3854 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3855 noneg_PVSSRC(&(pAsm->S[0].src));
3856
3857 if( GL_FALSE == next_ins(pAsm) )
3858 {
3859 return GL_FALSE;
3860 }
3861
3862 // MOV dst.w 1.0
3863
3864 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3865
3866 if( GL_FALSE == assemble_dst(pAsm) )
3867 {
3868 return GL_FALSE;
3869 }
3870
3871 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3872
3873 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3874 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3875 pAsm->S[0].src.reg = tmp1;
3876
3877 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3878 noneg_PVSSRC(&(pAsm->S[0].src));
3879
3880 if( GL_FALSE == next_ins(pAsm) )
3881 {
3882 return GL_FALSE;
3883 }
3884
3885 return GL_TRUE;
3886 }
3887
3888 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3889 {
3890 int tmp, ii;
3891 GLboolean bReplaceDst = GL_FALSE;
3892 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3893
3894 if( GL_FALSE == checkop3(pAsm) )
3895 {
3896 return GL_FALSE;
3897 }
3898
3899 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3900 pAsm->D.dst.op3 = 1;
3901
3902 tmp = (-1);
3903
3904 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3905 { /* TODO : more investigation on MAD src and dst using same register */
3906 for(ii=0; ii<3; ii++)
3907 {
3908 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3909 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3910 {
3911 bReplaceDst = GL_TRUE;
3912 break;
3913 }
3914 }
3915 }
3916 if(0xF != pILInst->DstReg.WriteMask)
3917 { /* OP3 has no support for write mask */
3918 bReplaceDst = GL_TRUE;
3919 }
3920
3921 if(GL_TRUE == bReplaceDst)
3922 {
3923 tmp = gethelpr(pAsm);
3924
3925 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3926 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3927 pAsm->D.dst.reg = tmp;
3928
3929 nomask_PVSDST(&(pAsm->D.dst));
3930 }
3931 else
3932 {
3933 if( GL_FALSE == assemble_dst(pAsm) )
3934 {
3935 return GL_FALSE;
3936 }
3937 }
3938
3939 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3940 {
3941 return GL_FALSE;
3942 }
3943
3944 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3945 {
3946 return GL_FALSE;
3947 }
3948
3949 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3950 {
3951 return GL_FALSE;
3952 }
3953
3954 if ( GL_FALSE == next_ins(pAsm) )
3955 {
3956 return GL_FALSE;
3957 }
3958
3959 if (GL_TRUE == bReplaceDst)
3960 {
3961 if( GL_FALSE == assemble_dst(pAsm) )
3962 {
3963 return GL_FALSE;
3964 }
3965
3966 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3967
3968 //tmp for source
3969 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3970 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3971 pAsm->S[0].src.reg = tmp;
3972
3973 noneg_PVSSRC(&(pAsm->S[0].src));
3974 noswizzle_PVSSRC(&(pAsm->S[0].src));
3975
3976 if( GL_FALSE == next_ins(pAsm) )
3977 {
3978 return GL_FALSE;
3979 }
3980 }
3981
3982 return GL_TRUE;
3983 }
3984
3985 /* LIT dst, src */
3986 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3987 {
3988 unsigned int dstReg;
3989 unsigned int dstType;
3990 unsigned int srcReg;
3991 unsigned int srcType;
3992 checkop1(pAsm);
3993 int tmp = gethelpr(pAsm);
3994
3995 if( GL_FALSE == assemble_dst(pAsm) )
3996 {
3997 return GL_FALSE;
3998 }
3999 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4000 {
4001 return GL_FALSE;
4002 }
4003 dstReg = pAsm->D.dst.reg;
4004 dstType = pAsm->D.dst.rtype;
4005 srcReg = pAsm->S[0].src.reg;
4006 srcType = pAsm->S[0].src.rtype;
4007
4008 /* dst.xw, <- 1.0 */
4009 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4010 pAsm->D.dst.rtype = dstType;
4011 pAsm->D.dst.reg = dstReg;
4012 pAsm->D.dst.writex = 1;
4013 pAsm->D.dst.writey = 0;
4014 pAsm->D.dst.writez = 0;
4015 pAsm->D.dst.writew = 1;
4016 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4017 pAsm->S[0].src.reg = tmp;
4018 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4019 noneg_PVSSRC(&(pAsm->S[0].src));
4020 pAsm->S[0].src.swizzlex = SQ_SEL_1;
4021 pAsm->S[0].src.swizzley = SQ_SEL_1;
4022 pAsm->S[0].src.swizzlez = SQ_SEL_1;
4023 pAsm->S[0].src.swizzlew = SQ_SEL_1;
4024 if( GL_FALSE == next_ins(pAsm) )
4025 {
4026 return GL_FALSE;
4027 }
4028
4029 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4030 {
4031 return GL_FALSE;
4032 }
4033
4034 /* dst.y = max(src.x, 0.0) */
4035 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4036 pAsm->D.dst.rtype = dstType;
4037 pAsm->D.dst.reg = dstReg;
4038 pAsm->D.dst.writex = 0;
4039 pAsm->D.dst.writey = 1;
4040 pAsm->D.dst.writez = 0;
4041 pAsm->D.dst.writew = 0;
4042 pAsm->S[0].src.rtype = srcType;
4043 pAsm->S[0].src.reg = srcReg;
4044 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4045 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4046 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4047 pAsm->S[1].src.reg = tmp;
4048 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4049 noneg_PVSSRC(&(pAsm->S[1].src));
4050 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4051 pAsm->S[1].src.swizzley = SQ_SEL_0;
4052 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4053 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4054 if( GL_FALSE == next_ins(pAsm) )
4055 {
4056 return GL_FALSE;
4057 }
4058
4059 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4060 {
4061 return GL_FALSE;
4062 }
4063
4064 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
4065
4066 /* dst.z = log(src.y) */
4067 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
4068 pAsm->D.dst.math = 1;
4069 pAsm->D.dst.rtype = dstType;
4070 pAsm->D.dst.reg = dstReg;
4071 pAsm->D.dst.writex = 0;
4072 pAsm->D.dst.writey = 0;
4073 pAsm->D.dst.writez = 1;
4074 pAsm->D.dst.writew = 0;
4075 pAsm->S[0].src.rtype = srcType;
4076 pAsm->S[0].src.reg = srcReg;
4077 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4078 if( GL_FALSE == next_ins(pAsm) )
4079 {
4080 return GL_FALSE;
4081 }
4082
4083 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4084 {
4085 return GL_FALSE;
4086 }
4087
4088 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
4089 {
4090 return GL_FALSE;
4091 }
4092
4093 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4094
4095 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4096
4097 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4098 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
4099 pAsm->D.dst.math = 1;
4100 pAsm->D.dst.op3 = 1;
4101 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4102 pAsm->D.dst.reg = tmp;
4103 pAsm->D.dst.writex = 1;
4104 pAsm->D.dst.writey = 0;
4105 pAsm->D.dst.writez = 0;
4106 pAsm->D.dst.writew = 0;
4107
4108 pAsm->S[0].src.rtype = srcType;
4109 pAsm->S[0].src.reg = srcReg;
4110 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4111
4112 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4113 pAsm->S[1].src.reg = dstReg;
4114 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4115 noneg_PVSSRC(&(pAsm->S[1].src));
4116 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
4117 pAsm->S[1].src.swizzley = SQ_SEL_Z;
4118 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4119 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
4120
4121 pAsm->S[2].src.rtype = srcType;
4122 pAsm->S[2].src.reg = srcReg;
4123 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4124
4125 if( GL_FALSE == next_ins(pAsm) )
4126 {
4127 return GL_FALSE;
4128 }
4129
4130 /* dst.z = exp(tmp.x) */
4131 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4132 pAsm->D.dst.math = 1;
4133 pAsm->D.dst.rtype = dstType;
4134 pAsm->D.dst.reg = dstReg;
4135 pAsm->D.dst.writex = 0;
4136 pAsm->D.dst.writey = 0;
4137 pAsm->D.dst.writez = 1;
4138 pAsm->D.dst.writew = 0;
4139
4140 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4141 pAsm->S[0].src.reg = tmp;
4142 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4143 noneg_PVSSRC(&(pAsm->S[0].src));
4144 pAsm->S[0].src.swizzlex = SQ_SEL_X;
4145 pAsm->S[0].src.swizzley = SQ_SEL_X;
4146 pAsm->S[0].src.swizzlez = SQ_SEL_X;
4147 pAsm->S[0].src.swizzlew = SQ_SEL_X;
4148
4149 if( GL_FALSE == next_ins(pAsm) )
4150 {
4151 return GL_FALSE;
4152 }
4153
4154 return GL_TRUE;
4155 }
4156
4157 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
4158 {
4159 if( GL_FALSE == checkop2(pAsm) )
4160 {
4161 return GL_FALSE;
4162 }
4163
4164 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4165
4166 if( GL_FALSE == assemble_dst(pAsm) )
4167 {
4168 return GL_FALSE;
4169 }
4170
4171 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4172 {
4173 return GL_FALSE;
4174 }
4175
4176 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4177 {
4178 return GL_FALSE;
4179 }
4180
4181 if( GL_FALSE == next_ins(pAsm) )
4182 {
4183 return GL_FALSE;
4184 }
4185
4186 return GL_TRUE;
4187 }
4188
4189 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
4190 {
4191 if( GL_FALSE == checkop2(pAsm) )
4192 {
4193 return GL_FALSE;
4194 }
4195
4196 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
4197
4198 if( GL_FALSE == assemble_dst(pAsm) )
4199 {
4200 return GL_FALSE;
4201 }
4202
4203 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4204 {
4205 return GL_FALSE;
4206 }
4207
4208 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4209 {
4210 return GL_FALSE;
4211 }
4212
4213 if( GL_FALSE == next_ins(pAsm) )
4214 {
4215 return GL_FALSE;
4216 }
4217
4218 return GL_TRUE;
4219 }
4220
4221 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
4222 {
4223 checkop1(pAsm);
4224
4225 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4226
4227 if (GL_FALSE == assemble_dst(pAsm))
4228 {
4229 return GL_FALSE;
4230 }
4231
4232 if (GL_FALSE == assemble_src(pAsm, 0, -1))
4233 {
4234 return GL_FALSE;
4235 }
4236
4237 if ( GL_FALSE == next_ins(pAsm) )
4238 {
4239 return GL_FALSE;
4240 }
4241
4242 return GL_TRUE;
4243 }
4244
4245 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
4246 {
4247 if( GL_FALSE == checkop2(pAsm) )
4248 {
4249 return GL_FALSE;
4250 }
4251
4252 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4253
4254 if( GL_FALSE == assemble_dst(pAsm) )
4255 {
4256 return GL_FALSE;
4257 }
4258
4259 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4260 {
4261 return GL_FALSE;
4262 }
4263
4264 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4265 {
4266 return GL_FALSE;
4267 }
4268
4269 if( GL_FALSE == next_ins(pAsm) )
4270 {
4271 return GL_FALSE;
4272 }
4273
4274 return GL_TRUE;
4275 }
4276
4277 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
4278 {
4279 BITS tmp;
4280
4281 checkop1(pAsm);
4282
4283 tmp = gethelpr(pAsm);
4284
4285 // LG2 tmp.x, a.swizzle
4286 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4287 pAsm->D.dst.math = 1;
4288
4289 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4290 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4291 pAsm->D.dst.reg = tmp;
4292 nomask_PVSDST(&(pAsm->D.dst));
4293
4294 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4295 {
4296 return GL_FALSE;
4297 }
4298
4299 if( GL_FALSE == next_ins(pAsm) )
4300 {
4301 return GL_FALSE;
4302 }
4303
4304 // MUL tmp.x, tmp.x, b.swizzle
4305 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4306
4307 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4308 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4309 pAsm->D.dst.reg = tmp;
4310 nomask_PVSDST(&(pAsm->D.dst));
4311
4312 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4313 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4314 pAsm->S[0].src.reg = tmp;
4315 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4316 noneg_PVSSRC(&(pAsm->S[0].src));
4317
4318 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4319 {
4320 return GL_FALSE;
4321 }
4322
4323 if( GL_FALSE == next_ins(pAsm) )
4324 {
4325 return GL_FALSE;
4326 }
4327
4328 // EX2 dst.mask, tmp.x
4329 // EX2 tmp.x, tmp.x
4330 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4331 pAsm->D.dst.math = 1;
4332
4333 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4334 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4335 pAsm->D.dst.reg = tmp;
4336 nomask_PVSDST(&(pAsm->D.dst));
4337
4338 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4339 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4340 pAsm->S[0].src.reg = tmp;
4341 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4342 noneg_PVSSRC(&(pAsm->S[0].src));
4343
4344 if( GL_FALSE == next_ins(pAsm) )
4345 {
4346 return GL_FALSE;
4347 }
4348
4349 // Now replicate result to all necessary channels in destination
4350 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4351
4352 if( GL_FALSE == assemble_dst(pAsm) )
4353 {
4354 return GL_FALSE;
4355 }
4356
4357 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4358 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4359 pAsm->S[0].src.reg = tmp;
4360
4361 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4362 noneg_PVSSRC(&(pAsm->S[0].src));
4363
4364 if( GL_FALSE == next_ins(pAsm) )
4365 {
4366 return GL_FALSE;
4367 }
4368
4369 return GL_TRUE;
4370 }
4371
4372 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4373 {
4374 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4375 }
4376
4377 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4378 {
4379 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4380 }
4381
4382 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
4383 {
4384 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
4385 }
4386
4387 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4388 {
4389 BITS tmp;
4390
4391 checkop1(pAsm);
4392
4393 tmp = gethelpr(pAsm);
4394
4395 // COS tmp.x, a.x
4396 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4397 pAsm->D.dst.math = 1;
4398
4399 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4400 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4401 pAsm->D.dst.reg = tmp;
4402 pAsm->D.dst.writex = 1;
4403
4404 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4405 {
4406 return GL_FALSE;
4407 }
4408
4409 if ( GL_FALSE == next_ins(pAsm) )
4410 {
4411 return GL_FALSE;
4412 }
4413
4414 // SIN tmp.y, a.x
4415 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4416 pAsm->D.dst.math = 1;
4417
4418 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4419 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4420 pAsm->D.dst.reg = tmp;
4421 pAsm->D.dst.writey = 1;
4422
4423 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4424 {
4425 return GL_FALSE;
4426 }
4427
4428 if( GL_FALSE == next_ins(pAsm) )
4429 {
4430 return GL_FALSE;
4431 }
4432
4433 // MOV dst.mask, tmp
4434 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4435
4436 if( GL_FALSE == assemble_dst(pAsm) )
4437 {
4438 return GL_FALSE;
4439 }
4440
4441 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4442 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4443 pAsm->S[0].src.reg = tmp;
4444
4445 noswizzle_PVSSRC(&(pAsm->S[0].src));
4446 pAsm->S[0].src.swizzlez = SQ_SEL_0;
4447 pAsm->S[0].src.swizzlew = SQ_SEL_0;
4448
4449 if ( GL_FALSE == next_ins(pAsm) )
4450 {
4451 return GL_FALSE;
4452 }
4453
4454 return GL_TRUE;
4455 }
4456
4457 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4458 {
4459 if( GL_FALSE == checkop2(pAsm) )
4460 {
4461 return GL_FALSE;
4462 }
4463
4464 pAsm->D.dst.opcode = opcode;
4465 pAsm->D.dst.math = 1;
4466
4467 if( GL_FALSE == assemble_dst(pAsm) )
4468 {
4469 return GL_FALSE;
4470 }
4471
4472 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4473 {
4474 return GL_FALSE;
4475 }
4476
4477 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4478 {
4479 return GL_FALSE;
4480 }
4481
4482 if( GL_FALSE == next_ins(pAsm) )
4483 {
4484 return GL_FALSE;
4485 }
4486
4487 return GL_TRUE;
4488 }
4489
4490 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4491 {
4492 if( GL_FALSE == checkop2(pAsm) )
4493 {
4494 return GL_FALSE;
4495 }
4496
4497 pAsm->D.dst.opcode = opcode;
4498 pAsm->D.dst.math = 1;
4499 pAsm->D.dst.predicated = 1;
4500 pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
4501
4502 if( GL_FALSE == assemble_dst(pAsm) )
4503 {
4504 return GL_FALSE;
4505 }
4506
4507 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4508 {
4509 return GL_FALSE;
4510 }
4511
4512 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4513 {
4514 return GL_FALSE;
4515 }
4516
4517 if( GL_FALSE == next_ins2(pAsm) )
4518 {
4519 return GL_FALSE;
4520 }
4521
4522 return GL_TRUE;
4523 }
4524
4525 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4526 {
4527 if( GL_FALSE == checkop2(pAsm) )
4528 {
4529 return GL_FALSE;
4530 }
4531
4532 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4533
4534 if( GL_FALSE == assemble_dst(pAsm) )
4535 {
4536 return GL_FALSE;
4537 }
4538
4539 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4540 {
4541 return GL_FALSE;
4542 }
4543
4544 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4545 {
4546 return GL_FALSE;
4547 }
4548
4549 if( GL_FALSE == next_ins(pAsm) )
4550 {
4551 return GL_FALSE;
4552 }
4553
4554 return GL_TRUE;
4555 }
4556
4557 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4558 {
4559 if( GL_FALSE == checkop2(pAsm) )
4560 {
4561 return GL_FALSE;
4562 }
4563
4564 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4565
4566 if( GL_FALSE == assemble_dst(pAsm) )
4567 {
4568 return GL_FALSE;
4569 }
4570
4571 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4572 {
4573 return GL_FALSE;
4574 }
4575
4576 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4577 {
4578 return GL_FALSE;
4579 }
4580
4581 if( GL_FALSE == next_ins(pAsm) )
4582 {
4583 return GL_FALSE;
4584 }
4585
4586 return GL_TRUE;
4587 }
4588
4589 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
4590 {
4591 return GL_TRUE;
4592 }
4593
4594 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
4595 {
4596 GLboolean src_const;
4597 GLboolean need_barrier = GL_FALSE;
4598
4599 checkop1(pAsm);
4600
4601 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
4602 {
4603 case PROGRAM_CONSTANT:
4604 case PROGRAM_LOCAL_PARAM:
4605 case PROGRAM_ENV_PARAM:
4606 case PROGRAM_STATE_VAR:
4607 src_const = GL_TRUE;
4608 break;
4609 case PROGRAM_TEMPORARY:
4610 case PROGRAM_INPUT:
4611 default:
4612 src_const = GL_FALSE;
4613 break;
4614 }
4615
4616 if (GL_TRUE == src_const)
4617 {
4618 if ( GL_FALSE == mov_temp(pAsm, 0) )
4619 return GL_FALSE;
4620 need_barrier = GL_TRUE;
4621 }
4622
4623 switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
4624 {
4625 case OPCODE_TEX:
4626 break;
4627 case OPCODE_TXB:
4628 radeon_error("do not support TXB yet\n");
4629 return GL_FALSE;
4630 break;
4631 case OPCODE_TXP:
4632 break;
4633 default:
4634 radeon_error("Internal error: bad texture op (not TEX)\n");
4635 return GL_FALSE;
4636 break;
4637 }
4638
4639 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4640 {
4641 GLuint tmp = gethelpr(pAsm);
4642 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4643 pAsm->D.dst.math = 1;
4644 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4645 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4646 pAsm->D.dst.reg = tmp;
4647 pAsm->D.dst.writew = 1;
4648
4649 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4650 {
4651 return GL_FALSE;
4652 }
4653 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4654 if( GL_FALSE == next_ins(pAsm) )
4655 {
4656 return GL_FALSE;
4657 }
4658
4659 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4660 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4661 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4662 pAsm->D.dst.reg = tmp;
4663 pAsm->D.dst.writex = 1;
4664 pAsm->D.dst.writey = 1;
4665 pAsm->D.dst.writez = 1;
4666 pAsm->D.dst.writew = 0;
4667
4668 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4669 {
4670 return GL_FALSE;
4671 }
4672 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4673 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4674 pAsm->S[1].src.reg = tmp;
4675 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
4676
4677 if( GL_FALSE == next_ins(pAsm) )
4678 {
4679 return GL_FALSE;
4680 }
4681
4682 pAsm->aArgSubst[1] = tmp;
4683 need_barrier = GL_TRUE;
4684 }
4685
4686 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
4687 {
4688 GLuint tmp1 = gethelpr(pAsm);
4689 GLuint tmp2 = gethelpr(pAsm);
4690
4691 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4692 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
4693 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4694 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4695 pAsm->D.dst.reg = tmp1;
4696 nomask_PVSDST(&(pAsm->D.dst));
4697
4698 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4699 {
4700 return GL_FALSE;
4701 }
4702
4703 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4704 {
4705 return GL_FALSE;
4706 }
4707
4708 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
4709 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
4710
4711 if( GL_FALSE == next_ins(pAsm) )
4712 {
4713 return GL_FALSE;
4714 }
4715
4716 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
4717 * have to do explicit instruction
4718 */
4719 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4720 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4721 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4722 pAsm->D.dst.reg = tmp1;
4723 pAsm->D.dst.writez = 1;
4724
4725 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4726 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4727 pAsm->S[0].src.reg = tmp1;
4728 noswizzle_PVSSRC(&(pAsm->S[0].src));
4729 pAsm->S[1].bits = pAsm->S[0].bits;
4730 flipneg_PVSSRC(&(pAsm->S[1].src));
4731
4732 next_ins(pAsm);
4733
4734 /* tmp1.z = RCP_e(|tmp1.z|) */
4735 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4736 pAsm->D.dst.math = 1;
4737 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4738 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4739 pAsm->D.dst.reg = tmp1;
4740 pAsm->D.dst.writez = 1;
4741
4742 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4743 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4744 pAsm->S[0].src.reg = tmp1;
4745 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4746
4747 next_ins(pAsm);
4748
4749 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4750 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4751 * muladd has no writemask, have to use another temp
4752 * also no support for imm constants, so add 1 here
4753 */
4754 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4755 pAsm->D.dst.op3 = 1;
4756 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4757 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4758 pAsm->D.dst.reg = tmp2;
4759
4760 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4761 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4762 pAsm->S[0].src.reg = tmp1;
4763 noswizzle_PVSSRC(&(pAsm->S[0].src));
4764 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4765 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4766 pAsm->S[1].src.reg = tmp1;
4767 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4768 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4769 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4770 pAsm->S[2].src.reg = tmp1;
4771 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
4772
4773 next_ins(pAsm);
4774
4775 /* ADD the remaining .5 */
4776 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4777 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4778 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4779 pAsm->D.dst.reg = tmp2;
4780 pAsm->D.dst.writex = 1;
4781 pAsm->D.dst.writey = 1;
4782 pAsm->D.dst.writez = 0;
4783 pAsm->D.dst.writew = 0;
4784
4785 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4786 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4787 pAsm->S[0].src.reg = tmp2;
4788 noswizzle_PVSSRC(&(pAsm->S[0].src));
4789 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4790 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4791 pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
4792 noswizzle_PVSSRC(&(pAsm->S[1].src));
4793
4794 next_ins(pAsm);
4795
4796 /* tmp1.xy = temp2.xy */
4797 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4798 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4799 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4800 pAsm->D.dst.reg = tmp1;
4801 pAsm->D.dst.writex = 1;
4802 pAsm->D.dst.writey = 1;
4803 pAsm->D.dst.writez = 0;
4804 pAsm->D.dst.writew = 0;
4805
4806 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4807 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4808 pAsm->S[0].src.reg = tmp2;
4809 noswizzle_PVSSRC(&(pAsm->S[0].src));
4810
4811 next_ins(pAsm);
4812 pAsm->aArgSubst[1] = tmp1;
4813 need_barrier = GL_TRUE;
4814
4815 }
4816
4817 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4818 pAsm->is_tex = GL_TRUE;
4819 if ( GL_TRUE == need_barrier )
4820 {
4821 pAsm->need_tex_barrier = GL_TRUE;
4822 }
4823 // Set src1 to tex unit id
4824 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
4825 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4826
4827 //No sw info from mesa compiler, so hard code here.
4828 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4829 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4830 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4831 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4832
4833 if( GL_FALSE == tex_dst(pAsm) )
4834 {
4835 return GL_FALSE;
4836 }
4837
4838 if( GL_FALSE == tex_src(pAsm) )
4839 {
4840 return GL_FALSE;
4841 }
4842
4843 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4844 {
4845 /* hopefully did swizzles before */
4846 noswizzle_PVSSRC(&(pAsm->S[0].src));
4847 }
4848
4849 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4850 {
4851 /* SAMPLE dst, tmp.yxwy, CUBE */
4852 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4853 pAsm->S[0].src.swizzley = SQ_SEL_X;
4854 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4855 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4856 }
4857
4858 if ( GL_FALSE == next_ins(pAsm) )
4859 {
4860 return GL_FALSE;
4861 }
4862
4863 return GL_TRUE;
4864 }
4865
4866 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4867 {
4868 BITS tmp;
4869
4870 if( GL_FALSE == checkop2(pAsm) )
4871 {
4872 return GL_FALSE;
4873 }
4874
4875 tmp = gethelpr(pAsm);
4876
4877 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4878
4879 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4880 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4881 pAsm->D.dst.reg = tmp;
4882 nomask_PVSDST(&(pAsm->D.dst));
4883
4884 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4885 {
4886 return GL_FALSE;
4887 }
4888
4889 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4890 {
4891 return GL_FALSE;
4892 }
4893
4894 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4895 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4896
4897 if( GL_FALSE == next_ins(pAsm) )
4898 {
4899 return GL_FALSE;
4900 }
4901
4902 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4903 pAsm->D.dst.op3 = 1;
4904
4905 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4906 {
4907 tmp = gethelpr(pAsm);
4908
4909 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4910 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4911 pAsm->D.dst.reg = tmp;
4912
4913 nomask_PVSDST(&(pAsm->D.dst));
4914 }
4915 else
4916 {
4917 if( GL_FALSE == assemble_dst(pAsm) )
4918 {
4919 return GL_FALSE;
4920 }
4921 }
4922
4923 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4924 {
4925 return GL_FALSE;
4926 }
4927
4928 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4929 {
4930 return GL_FALSE;
4931 }
4932
4933 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4934 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4935
4936 // result1 + (neg) result0
4937 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4938 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4939 pAsm->S[2].src.reg = tmp;
4940
4941 neg_PVSSRC(&(pAsm->S[2].src));
4942 noswizzle_PVSSRC(&(pAsm->S[2].src));
4943
4944 if( GL_FALSE == next_ins(pAsm) )
4945 {
4946 return GL_FALSE;
4947 }
4948
4949
4950 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4951 {
4952 if( GL_FALSE == assemble_dst(pAsm) )
4953 {
4954 return GL_FALSE;
4955 }
4956
4957 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4958
4959 // Use tmp as source
4960 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4961 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4962 pAsm->S[0].src.reg = tmp;
4963
4964 noneg_PVSSRC(&(pAsm->S[0].src));
4965 noswizzle_PVSSRC(&(pAsm->S[0].src));
4966
4967 if( GL_FALSE == next_ins(pAsm) )
4968 {
4969 return GL_FALSE;
4970 }
4971 }
4972
4973 return GL_TRUE;
4974 }
4975
4976 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4977 {
4978 return GL_TRUE;
4979 }
4980
4981 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
4982 {
4983 if(GL_FALSE == add_cf_instruction(pAsm) )
4984 {
4985 return GL_FALSE;
4986 }
4987
4988 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
4989 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
4990 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
4991
4992 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
4993 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4994 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
4995 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4996
4997 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
4998
4999 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
5000
5001 return GL_TRUE;
5002 }
5003
5004 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
5005 {
5006 if(GL_FALSE == add_cf_instruction(pAsm) )
5007 {
5008 return GL_FALSE;
5009 }
5010
5011 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5012 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5013 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5014
5015 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5016 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5017 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5018
5019 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5020
5021 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5022 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5023
5024 return GL_TRUE;
5025 }
5026
5027 GLboolean assemble_IF(r700_AssemblerBase *pAsm)
5028 {
5029 if(GL_FALSE == add_cf_instruction(pAsm) )
5030 {
5031 return GL_FALSE;
5032 }
5033
5034 if(GL_TRUE != bHasElse)
5035 {
5036 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5037 }
5038 else
5039 {
5040 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5041 }
5042 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5043 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5044
5045 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5046 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5047 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5048 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5049
5050 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5051
5052 pAsm->FCSP++;
5053 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
5054 pAsm->fc_stack[pAsm->FCSP].bpush = 0;
5055 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5056 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
5057 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5058
5059 if(GL_TRUE != bHasElse)
5060 {
5061 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5062 }
5063
5064 pAsm->branch_depth++;
5065
5066 if(pAsm->branch_depth > pAsm->max_branch_depth)
5067 {
5068 pAsm->max_branch_depth = pAsm->branch_depth;
5069 }
5070 return GL_TRUE;
5071 }
5072
5073 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
5074 {
5075 if(GL_FALSE == add_cf_instruction(pAsm) )
5076 {
5077 return GL_FALSE;
5078 }
5079
5080 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
5081 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5082 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5083
5084 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5085 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5086 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
5087 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5088
5089 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5090
5091 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
5092 0,
5093 sizeof(R700ControlFlowGenericClause *) );
5094 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
5095 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5096
5097 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5098
5099 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
5100
5101 return GL_TRUE;
5102 }
5103
5104 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
5105 {
5106 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5107
5108 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
5109 {
5110 /* no else in between */
5111 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5112 }
5113 else
5114 {
5115 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5116 }
5117
5118 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5119 {
5120 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5121 }
5122
5123 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
5124 {
5125 radeon_error("if/endif in shader code are not paired. \n");
5126 return GL_FALSE;
5127 }
5128 pAsm->branch_depth--;
5129 pAsm->FCSP--;
5130
5131 return GL_TRUE;
5132 }
5133
5134 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
5135 {
5136 if(GL_FALSE == add_cf_instruction(pAsm) )
5137 {
5138 return GL_FALSE;
5139 }
5140
5141
5142 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5143 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5144 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5145
5146 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5147 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5148 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
5149 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5150
5151 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5152
5153 pAsm->FCSP++;
5154 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
5155 pAsm->fc_stack[pAsm->FCSP].bpush = 1;
5156 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5157 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
5158 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
5159 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5160
5161 pAsm->branch_depth++;
5162
5163 if(pAsm->branch_depth > pAsm->max_branch_depth)
5164 {
5165 pAsm->max_branch_depth = pAsm->branch_depth;
5166 }
5167 return GL_TRUE;
5168 }
5169
5170 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
5171 {
5172 #ifdef USE_CF_FOR_CONTINUE_BREAK
5173 unsigned int unFCSP;
5174 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5175 {
5176 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5177 {
5178 break;
5179 }
5180 }
5181 if(0 == FC_LOOP)
5182 {
5183 radeon_error("Break is not inside loop/endloop pair.\n");
5184 return GL_FALSE;
5185 }
5186
5187 if(GL_FALSE == add_cf_instruction(pAsm) )
5188 {
5189 return GL_FALSE;
5190 }
5191
5192
5193 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5194 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5195 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5196
5197 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5198 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5199 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5200
5201 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5202
5203 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5204
5205 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5206 (void *)pAsm->fc_stack[unFCSP].mid,
5207 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5208 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5209 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5210 pAsm->fc_stack[unFCSP].unNumMid++;
5211
5212 if(GL_FALSE == add_cf_instruction(pAsm) )
5213 {
5214 return GL_FALSE;
5215 }
5216
5217 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5218 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5219 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5220
5221 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5222 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5223 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5224
5225 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5226
5227 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5228 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5229
5230 #endif //USE_CF_FOR_CONTINUE_BREAK
5231 return GL_TRUE;
5232 }
5233
5234 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
5235 {
5236 #ifdef USE_CF_FOR_CONTINUE_BREAK
5237 unsigned int unFCSP;
5238 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5239 {
5240 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5241 {
5242 break;
5243 }
5244 }
5245 if(0 == FC_LOOP)
5246 {
5247 radeon_error("Continue is not inside loop/endloop pair.\n");
5248 return GL_FALSE;
5249 }
5250
5251 if(GL_FALSE == add_cf_instruction(pAsm) )
5252 {
5253 return GL_FALSE;
5254 }
5255
5256
5257 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5258 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5259 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5260
5261 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5262 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5263 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
5264
5265 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5266
5267 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5268
5269 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5270 (void *)pAsm->fc_stack[unFCSP].mid,
5271 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5272 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5273 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5274 pAsm->fc_stack[unFCSP].unNumMid++;
5275
5276 if(GL_FALSE == add_cf_instruction(pAsm) )
5277 {
5278 return GL_FALSE;
5279 }
5280
5281 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5282 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5283 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5284
5285 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5286 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5287 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5288
5289 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5290
5291 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5292 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5293
5294 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5295
5296 return GL_TRUE;
5297 }
5298
5299 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
5300 {
5301 GLuint i;
5302
5303 if(GL_FALSE == add_cf_instruction(pAsm) )
5304 {
5305 return GL_FALSE;
5306 }
5307
5308
5309 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5310 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5311 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5312
5313 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5314 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5315 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
5316 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5317
5318 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5319
5320 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
5321 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5322
5323 #ifdef USE_CF_FOR_CONTINUE_BREAK
5324 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
5325 {
5326 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
5327 }
5328 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5329 {
5330 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5331 }
5332 #endif
5333
5334 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
5335 {
5336 radeon_error("loop/endloop in shader code are not paired. \n");
5337 return GL_FALSE;
5338 }
5339
5340 unsigned int unFCSP = 0;
5341 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
5342 {
5343 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5344 {
5345 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5346 {
5347 break;
5348 }
5349 }
5350 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
5351 {
5352 unFCSP = 0;
5353
5354 returnOnFlag(pAsm);
5355 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
5356 }
5357 }
5358
5359 pAsm->branch_depth--;
5360 pAsm->FCSP--;
5361
5362 if(unFCSP > 0)
5363 {
5364 breakLoopOnFlag(pAsm, unFCSP);
5365 }
5366
5367 return GL_TRUE;
5368 }
5369
5370 void add_return_inst(r700_AssemblerBase *pAsm)
5371 {
5372 if(GL_FALSE == add_cf_instruction(pAsm) )
5373 {
5374 return GL_FALSE;
5375 }
5376 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5377 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5378 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5379 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5380
5381 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5382 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5383 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
5384 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5385
5386 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5387 }
5388
5389 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
5390 {
5391 /* Put in sub */
5392 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
5393 {
5394 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
5395 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
5396 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
5397 if(NULL == pAsm->subs)
5398 {
5399 return GL_FALSE;
5400 }
5401 pAsm->unSubArraySize += 10;
5402 }
5403
5404 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
5405 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
5406 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
5407 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
5408
5409 pAsm->CALLSP++;
5410 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
5411 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
5412 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
5413 SetActiveCFlist(pAsm->pR700Shader,
5414 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5415
5416 pAsm->unSubArrayPointer++;
5417
5418 /* start sub */
5419 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5420
5421 return GL_TRUE;
5422 }
5423
5424 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
5425 {
5426 pAsm->CALLSP--;
5427 SetActiveCFlist(pAsm->pR700Shader,
5428 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5429
5430 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5431
5432 return GL_TRUE;
5433 }
5434
5435 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
5436 {
5437 if(pAsm->CALLSP > 0)
5438 { /* in sub */
5439 unsigned int unFCSP;
5440 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5441 {
5442 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5443 {
5444 setRetInLoopFlag(pAsm, SQ_SEL_1);
5445 breakLoopOnFlag(pAsm, unFCSP);
5446 pAsm->unCFflags |= LOOPRET_FLAGS;
5447
5448 return GL_TRUE;
5449 }
5450 }
5451 }
5452
5453 add_return_inst(pAsm);
5454
5455 return GL_TRUE;
5456 }
5457
5458 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
5459 GLint nILindex,
5460 GLuint uiNumberInsts,
5461 struct prog_instruction *pILInst)
5462 {
5463 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5464
5465 if(GL_FALSE == add_cf_instruction(pAsm) )
5466 {
5467 return GL_FALSE;
5468 }
5469
5470 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
5471 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5472 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5473 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5474
5475 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5476 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5477 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
5478 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5479
5480 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5481
5482 /* Put in caller */
5483 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
5484 {
5485 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
5486 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
5487 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
5488 if(NULL == pAsm->callers)
5489 {
5490 return GL_FALSE;
5491 }
5492 pAsm->unCallerArraySize += 10;
5493 }
5494
5495 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
5496 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
5497
5498 pAsm->unCallerArrayPointer++;
5499
5500 int j;
5501 for(j=0; j<pAsm->unSubArrayPointer; j++)
5502 {
5503 if(nILindex == pAsm->subs[j].subIL_Offset)
5504 { /* compiled before */
5505 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
5506 return GL_TRUE;
5507 }
5508 }
5509
5510 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
5511
5512 return AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
5513 }
5514
5515 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
5516 {
5517 GLfloat fLiteral[2] = {0.1, 0.0};
5518
5519 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5520 pAsm->D.dst.op3 = 0;
5521 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5522 pAsm->D.dst.reg = pAsm->flag_reg_index;
5523 pAsm->D.dst.writex = 1;
5524 pAsm->D.dst.writey = 0;
5525 pAsm->D.dst.writez = 0;
5526 pAsm->D.dst.writew = 0;
5527 pAsm->D2.dst2.literal = 1;
5528 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5529 pAsm->D.dst.predicated = 0;
5530 #if 0
5531 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
5532 //pAsm->S[0].src.reg = 0;
5533 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5534 noneg_PVSSRC(&(pAsm->S[0].src));
5535 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5536 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5537 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5538 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5539
5540 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5541 {
5542 return GL_FALSE;
5543 }
5544 #else
5545 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5546 pAsm->S[0].src.reg = 0;
5547 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5548 noneg_PVSSRC(&(pAsm->S[0].src));
5549 pAsm->S[0].src.swizzlex = flagValue;
5550 pAsm->S[0].src.swizzley = flagValue;
5551 pAsm->S[0].src.swizzlez = flagValue;
5552 pAsm->S[0].src.swizzlew = flagValue;
5553
5554 if( GL_FALSE == next_ins2(pAsm) )
5555 {
5556 return GL_FALSE;
5557 }
5558 #endif
5559
5560 return GL_TRUE;
5561 }
5562
5563 GLboolean testFlag(r700_AssemblerBase *pAsm)
5564 {
5565 GLfloat fLiteral[2] = {0.1, 0.0};
5566
5567 //Test flag
5568 GLuint tmp = gethelpr(pAsm);
5569 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5570
5571 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
5572 pAsm->D.dst.math = 1;
5573 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5574 pAsm->D.dst.reg = tmp;
5575 pAsm->D.dst.writex = 1;
5576 pAsm->D.dst.writey = 0;
5577 pAsm->D.dst.writez = 0;
5578 pAsm->D.dst.writew = 0;
5579 pAsm->D2.dst2.literal = 1;
5580 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5581 pAsm->D.dst.predicated = 1;
5582
5583 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5584 pAsm->S[0].src.reg = pAsm->flag_reg_index;
5585 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5586 noneg_PVSSRC(&(pAsm->S[0].src));
5587 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5588 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5589 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5590 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5591 #if 0
5592 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
5593 //pAsm->S[1].src.reg = 0;
5594 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5595 noneg_PVSSRC(&(pAsm->S[1].src));
5596 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5597 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5598 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5599 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5600
5601 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5602 {
5603 return GL_FALSE;
5604 }
5605 #else
5606 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
5607 pAsm->S[1].src.reg = 0;
5608 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5609 noneg_PVSSRC(&(pAsm->S[1].src));
5610 pAsm->S[1].src.swizzlex = SQ_SEL_1;
5611 pAsm->S[1].src.swizzley = SQ_SEL_1;
5612 pAsm->S[1].src.swizzlez = SQ_SEL_1;
5613 pAsm->S[1].src.swizzlew = SQ_SEL_1;
5614
5615 if( GL_FALSE == next_ins2(pAsm) )
5616 {
5617 return GL_FALSE;
5618 }
5619 #endif
5620
5621 return GL_TRUE;
5622 }
5623
5624 GLboolean returnOnFlag(r700_AssemblerBase *pAsm)
5625 {
5626 testFlag(pAsm);
5627 jumpToOffest(pAsm, 1, 4);
5628 setRetInLoopFlag(pAsm, SQ_SEL_0);
5629 pops(pAsm, 1);
5630 add_return_inst(pAsm);
5631
5632 return GL_TRUE;
5633 }
5634
5635 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
5636 {
5637 testFlag(pAsm);
5638
5639 //break
5640 if(GL_FALSE == add_cf_instruction(pAsm) )
5641 {
5642 return GL_FALSE;
5643 }
5644
5645 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5646 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5647 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5648
5649 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5650 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5651 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5652 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5653
5654 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5655
5656 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5657 (void *)pAsm->fc_stack[unFCSP].mid,
5658 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5659 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5660 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5661 pAsm->fc_stack[unFCSP].unNumMid++;
5662
5663 pops(pAsm, 1);
5664
5665 return GL_TRUE;
5666 }
5667
5668 GLboolean AssembleInstr(GLuint uiFirstInst,
5669 GLuint uiNumberInsts,
5670 struct prog_instruction *pILInst,
5671 r700_AssemblerBase *pR700AsmCode)
5672 {
5673 GLuint i;
5674
5675 pR700AsmCode->pILInst = pILInst;
5676 for(i=uiFirstInst; i<uiNumberInsts; i++)
5677 {
5678 pR700AsmCode->uiCurInst = i;
5679
5680 #ifndef USE_CF_FOR_CONTINUE_BREAK
5681 if(OPCODE_BRK == pILInst[i+1].Opcode)
5682 {
5683 switch(pILInst[i].Opcode)
5684 {
5685 case OPCODE_SLE:
5686 pILInst[i].Opcode = OPCODE_SGT;
5687 break;
5688 case OPCODE_SLT:
5689 pILInst[i].Opcode = OPCODE_SGE;
5690 break;
5691 case OPCODE_SGE:
5692 pILInst[i].Opcode = OPCODE_SLT;
5693 break;
5694 case OPCODE_SGT:
5695 pILInst[i].Opcode = OPCODE_SLE;
5696 break;
5697 case OPCODE_SEQ:
5698 pILInst[i].Opcode = OPCODE_SNE;
5699 break;
5700 case OPCODE_SNE:
5701 pILInst[i].Opcode = OPCODE_SEQ;
5702 break;
5703 default:
5704 break;
5705 }
5706 }
5707 #endif
5708
5709 switch (pILInst[i].Opcode)
5710 {
5711 case OPCODE_ABS:
5712 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
5713 return GL_FALSE;
5714 break;
5715 case OPCODE_ADD:
5716 case OPCODE_SUB:
5717 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
5718 return GL_FALSE;
5719 break;
5720
5721 case OPCODE_ARL:
5722 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
5723 return GL_FALSE;
5724 break;
5725 case OPCODE_ARR:
5726 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5727 //if ( GL_FALSE == assemble_BAD("ARR") )
5728 return GL_FALSE;
5729 break;
5730
5731 case OPCODE_CMP:
5732 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
5733 return GL_FALSE;
5734 break;
5735 case OPCODE_COS:
5736 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
5737 return GL_FALSE;
5738 break;
5739
5740 case OPCODE_DP3:
5741 case OPCODE_DP4:
5742 case OPCODE_DPH:
5743 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
5744 return GL_FALSE;
5745 break;
5746
5747 case OPCODE_DST:
5748 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
5749 return GL_FALSE;
5750 break;
5751
5752 case OPCODE_EX2:
5753 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
5754 return GL_FALSE;
5755 break;
5756 case OPCODE_EXP:
5757 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
5758 return GL_FALSE;
5759 break;
5760
5761 case OPCODE_FLR:
5762 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
5763 return GL_FALSE;
5764 break;
5765 //case OP_FLR_INT:
5766 // if ( GL_FALSE == assemble_FLR_INT() )
5767 // return GL_FALSE;
5768 // break;
5769
5770 case OPCODE_FRC:
5771 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
5772 return GL_FALSE;
5773 break;
5774
5775 case OPCODE_KIL:
5776 if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
5777 return GL_FALSE;
5778 break;
5779 case OPCODE_LG2:
5780 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
5781 return GL_FALSE;
5782 break;
5783 case OPCODE_LIT:
5784 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
5785 return GL_FALSE;
5786 break;
5787 case OPCODE_LRP:
5788 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
5789 return GL_FALSE;
5790 break;
5791 case OPCODE_LOG:
5792 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
5793 return GL_FALSE;
5794 break;
5795
5796 case OPCODE_MAD:
5797 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
5798 return GL_FALSE;
5799 break;
5800 case OPCODE_MAX:
5801 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
5802 return GL_FALSE;
5803 break;
5804 case OPCODE_MIN:
5805 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
5806 return GL_FALSE;
5807 break;
5808
5809 case OPCODE_MOV:
5810 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5811 return GL_FALSE;
5812 break;
5813 case OPCODE_MUL:
5814 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
5815 return GL_FALSE;
5816 break;
5817
5818 case OPCODE_POW:
5819 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
5820 return GL_FALSE;
5821 break;
5822 case OPCODE_RCP:
5823 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
5824 return GL_FALSE;
5825 break;
5826 case OPCODE_RSQ:
5827 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
5828 return GL_FALSE;
5829 break;
5830 case OPCODE_SIN:
5831 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
5832 return GL_FALSE;
5833 break;
5834 case OPCODE_SCS:
5835 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
5836 return GL_FALSE;
5837 break;
5838
5839 case OPCODE_SEQ:
5840 if(OPCODE_IF == pILInst[i+1].Opcode)
5841 {
5842 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5843 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5844 {
5845 return GL_FALSE;
5846 }
5847 }
5848 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5849 {
5850 #ifdef USE_CF_FOR_CONTINUE_BREAK
5851 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5852 #else
5853 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5854 #endif
5855 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5856 {
5857 return GL_FALSE;
5858 }
5859 }
5860 else if(OPCODE_CONT == pILInst[i+1].Opcode)
5861 {
5862 #ifdef USE_CF_FOR_CONTINUE_BREAK
5863 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5864 #else
5865 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
5866 #endif
5867 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5868 {
5869 return GL_FALSE;
5870 }
5871 }
5872 else
5873 {
5874 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
5875 {
5876 return GL_FALSE;
5877 }
5878 }
5879 break;
5880
5881 case OPCODE_SGT:
5882 if(OPCODE_IF == pILInst[i+1].Opcode)
5883 {
5884 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5885 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5886 {
5887 return GL_FALSE;
5888 }
5889 }
5890 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5891 {
5892 #ifdef USE_CF_FOR_CONTINUE_BREAK
5893 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5894 #else
5895 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5896 #endif
5897 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5898 {
5899 return GL_FALSE;
5900 }
5901 }
5902 else if(OPCODE_CONT == pILInst[i+1].Opcode)
5903 {
5904 #ifdef USE_CF_FOR_CONTINUE_BREAK
5905 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5906 #else
5907 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
5908 #endif
5909
5910 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5911 {
5912 return GL_FALSE;
5913 }
5914 }
5915 else
5916 {
5917 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5918 {
5919 return GL_FALSE;
5920 }
5921 }
5922 break;
5923
5924 case OPCODE_SGE:
5925 if(OPCODE_IF == pILInst[i+1].Opcode)
5926 {
5927 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5928 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
5929 {
5930 return GL_FALSE;
5931 }
5932 }
5933 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5934 {
5935 #ifdef USE_CF_FOR_CONTINUE_BREAK
5936 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5937 #else
5938 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5939 #endif
5940 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
5941 {
5942 return GL_FALSE;
5943 }
5944 }
5945 else if(OPCODE_CONT == pILInst[i+1].Opcode)
5946 {
5947 #ifdef USE_CF_FOR_CONTINUE_BREAK
5948 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5949 #else
5950 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
5951 #endif
5952
5953 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
5954 {
5955 return GL_FALSE;
5956 }
5957 }
5958 else
5959 {
5960 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
5961 {
5962 return GL_FALSE;
5963 }
5964 }
5965 break;
5966
5967 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
5968 case OPCODE_SLT:
5969 {
5970 struct prog_src_register SrcRegSave[2];
5971 SrcRegSave[0] = pILInst[i].SrcReg[0];
5972 SrcRegSave[1] = pILInst[i].SrcReg[1];
5973 pILInst[i].SrcReg[0] = SrcRegSave[1];
5974 pILInst[i].SrcReg[1] = SrcRegSave[0];
5975 if(OPCODE_IF == pILInst[i+1].Opcode)
5976 {
5977 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5978 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5979 {
5980 pILInst[i].SrcReg[0] = SrcRegSave[0];
5981 pILInst[i].SrcReg[1] = SrcRegSave[1];
5982 return GL_FALSE;
5983 }
5984 }
5985 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5986 {
5987 #ifdef USE_CF_FOR_CONTINUE_BREAK
5988 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5989 #else
5990 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5991 #endif
5992 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5993 {
5994 pILInst[i].SrcReg[0] = SrcRegSave[0];
5995 pILInst[i].SrcReg[1] = SrcRegSave[1];
5996 return GL_FALSE;
5997 }
5998 }
5999 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6000 {
6001 #ifdef USE_CF_FOR_CONTINUE_BREAK
6002 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6003 #else
6004 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6005 #endif
6006
6007 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6008 {
6009 pILInst[i].SrcReg[0] = SrcRegSave[0];
6010 pILInst[i].SrcReg[1] = SrcRegSave[1];
6011 return GL_FALSE;
6012 }
6013 }
6014 else
6015 {
6016 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
6017 {
6018 pILInst[i].SrcReg[0] = SrcRegSave[0];
6019 pILInst[i].SrcReg[1] = SrcRegSave[1];
6020 return GL_FALSE;
6021 }
6022 }
6023 pILInst[i].SrcReg[0] = SrcRegSave[0];
6024 pILInst[i].SrcReg[1] = SrcRegSave[1];
6025 }
6026 break;
6027
6028 case OPCODE_SLE:
6029 {
6030 struct prog_src_register SrcRegSave[2];
6031 SrcRegSave[0] = pILInst[i].SrcReg[0];
6032 SrcRegSave[1] = pILInst[i].SrcReg[1];
6033 pILInst[i].SrcReg[0] = SrcRegSave[1];
6034 pILInst[i].SrcReg[1] = SrcRegSave[0];
6035 if(OPCODE_IF == pILInst[i+1].Opcode)
6036 {
6037 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6038 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6039 {
6040 pILInst[i].SrcReg[0] = SrcRegSave[0];
6041 pILInst[i].SrcReg[1] = SrcRegSave[1];
6042 return GL_FALSE;
6043 }
6044 }
6045 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6046 {
6047 #ifdef USE_CF_FOR_CONTINUE_BREAK
6048 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6049 #else
6050 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6051 #endif
6052 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6053 {
6054 pILInst[i].SrcReg[0] = SrcRegSave[0];
6055 pILInst[i].SrcReg[1] = SrcRegSave[1];
6056 return GL_FALSE;
6057 }
6058 }
6059 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6060 {
6061 #ifdef USE_CF_FOR_CONTINUE_BREAK
6062 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6063 #else
6064 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6065 #endif
6066
6067 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6068 {
6069 pILInst[i].SrcReg[0] = SrcRegSave[0];
6070 pILInst[i].SrcReg[1] = SrcRegSave[1];
6071 return GL_FALSE;
6072 }
6073 }
6074 else
6075 {
6076 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
6077 {
6078 pILInst[i].SrcReg[0] = SrcRegSave[0];
6079 pILInst[i].SrcReg[1] = SrcRegSave[1];
6080 return GL_FALSE;
6081 }
6082 }
6083 pILInst[i].SrcReg[0] = SrcRegSave[0];
6084 pILInst[i].SrcReg[1] = SrcRegSave[1];
6085 }
6086 break;
6087
6088 case OPCODE_SNE:
6089 if(OPCODE_IF == pILInst[i+1].Opcode)
6090 {
6091 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6092 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6093 {
6094 return GL_FALSE;
6095 }
6096 }
6097 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6098 {
6099 #ifdef USE_CF_FOR_CONTINUE_BREAK
6100 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6101 #else
6102 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6103 #endif
6104 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6105 {
6106 return GL_FALSE;
6107 }
6108 }
6109 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6110 {
6111 #ifdef USE_CF_FOR_CONTINUE_BREAK
6112 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6113 #else
6114 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6115 #endif
6116 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6117 {
6118 return GL_FALSE;
6119 }
6120 }
6121 else
6122 {
6123 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
6124 {
6125 return GL_FALSE;
6126 }
6127 }
6128 break;
6129
6130 //case OP_STP:
6131 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
6132 // return GL_FALSE;
6133 // break;
6134
6135 case OPCODE_SWZ:
6136 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
6137 {
6138 return GL_FALSE;
6139 }
6140 else
6141 {
6142 if( (i+1)<uiNumberInsts )
6143 {
6144 if(OPCODE_END != pILInst[i+1].Opcode)
6145 {
6146 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
6147 {
6148 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
6149 }
6150 }
6151 }
6152 }
6153 break;
6154
6155 case OPCODE_TEX:
6156 case OPCODE_TXB:
6157 case OPCODE_TXP:
6158 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
6159 return GL_FALSE;
6160 break;
6161
6162 case OPCODE_XPD:
6163 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
6164 return GL_FALSE;
6165 break;
6166
6167 case OPCODE_IF :
6168 {
6169 GLboolean bHasElse = GL_FALSE;
6170
6171 if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE)
6172 {
6173 bHasElse = GL_TRUE;
6174 }
6175
6176 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
6177 {
6178 return GL_FALSE;
6179 }
6180 }
6181 break;
6182
6183 case OPCODE_ELSE :
6184 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
6185 return GL_FALSE;
6186 break;
6187
6188 case OPCODE_ENDIF:
6189 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
6190 return GL_FALSE;
6191 break;
6192
6193 case OPCODE_BGNLOOP:
6194 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
6195 {
6196 return GL_FALSE;
6197 }
6198 break;
6199
6200 case OPCODE_BRK:
6201 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
6202 {
6203 return GL_FALSE;
6204 }
6205 break;
6206
6207 case OPCODE_CONT:
6208 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
6209 {
6210 return GL_FALSE;
6211 }
6212 break;
6213
6214 case OPCODE_ENDLOOP:
6215 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
6216 {
6217 return GL_FALSE;
6218 }
6219 break;
6220
6221 case OPCODE_BGNSUB:
6222 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
6223 {
6224 return GL_FALSE;
6225 }
6226 break;
6227
6228 case OPCODE_RET:
6229 if( GL_FALSE == assemble_RET(pR700AsmCode) )
6230 {
6231 return GL_FALSE;
6232 }
6233 break;
6234
6235 case OPCODE_CAL:
6236 if( GL_FALSE == assemble_CAL(pR700AsmCode,
6237 pILInst[i].BranchTarget,
6238 uiNumberInsts,
6239 pILInst) )
6240 {
6241 return GL_FALSE;
6242 }
6243 break;
6244
6245 //case OPCODE_EXPORT:
6246 // if ( GL_FALSE == assemble_EXPORT() )
6247 // return GL_FALSE;
6248 // break;
6249
6250 case OPCODE_ENDSUB:
6251 return assemble_ENDSUB(pR700AsmCode);
6252
6253 case OPCODE_END:
6254 //pR700AsmCode->uiCurInst = i;
6255 //This is to remaind that if in later exoort there is depth/stencil
6256 //export, we need a mov to re-arrange DST channel, where using a
6257 //psuedo inst, we will use this end inst to do it.
6258 return GL_TRUE;
6259
6260 default:
6261 radeon_error("internal: unknown instruction\n");
6262 return GL_FALSE;
6263 }
6264 }
6265
6266 return GL_TRUE;
6267 }
6268
6269 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
6270 {
6271 setRetInLoopFlag(pAsm, SQ_SEL_0);
6272 return GL_TRUE;
6273 }
6274
6275 GLboolean RelocProgram(r700_AssemblerBase * pAsm)
6276 {
6277 GLuint i;
6278 GLuint unCFoffset;
6279 TypedShaderList * plstCFmain;
6280 TypedShaderList * plstCFsub;
6281
6282 R700ShaderInstruction * pInst;
6283 R700ControlFlowGenericClause * pCFInst;
6284
6285 if(0 == pAsm->unSubArrayPointer)
6286 {
6287 return GL_TRUE;
6288 }
6289
6290 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
6291 unCFoffset = plstCFmain->uNumOfNode;
6292
6293 /* Reloc subs */
6294 for(i=0; i<pAsm->unSubArrayPointer; i++)
6295 {
6296 pAsm->subs[i].unCFoffset = unCFoffset;
6297 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
6298
6299 pInst = plstCFsub->pHead;
6300
6301 /* reloc instructions */
6302 while(pInst)
6303 {
6304 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
6305 {
6306 pCFInst = (R700ControlFlowGenericClause *)pInst;
6307
6308 switch (pCFInst->m_Word1.f.cf_inst)
6309 {
6310 case SQ_CF_INST_POP:
6311 case SQ_CF_INST_JUMP:
6312 case SQ_CF_INST_ELSE:
6313 case SQ_CF_INST_LOOP_END:
6314 case SQ_CF_INST_LOOP_START:
6315 case SQ_CF_INST_LOOP_START_NO_AL:
6316 case SQ_CF_INST_LOOP_CONTINUE:
6317 case SQ_CF_INST_LOOP_BREAK:
6318 pCFInst->m_Word0.f.addr += unCFoffset;
6319 break;
6320 default:
6321 break;
6322 }
6323 }
6324
6325 pInst->m_uIndex += unCFoffset;
6326
6327 pInst = pInst->pNextInst;
6328 };
6329
6330 /* Put sub into main */
6331 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
6332 plstCFmain->pTail = plstCFsub->pTail;
6333 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
6334
6335 unCFoffset += plstCFsub->uNumOfNode;
6336 }
6337
6338 /* reloc callers */
6339 for(i=0; i<pAsm->unCallerArrayPointer; i++)
6340 {
6341 pAsm->callers[i].cf_ptr->m_Word0.f.addr
6342 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
6343 }
6344
6345 /* remove flags init if they are not used */
6346 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
6347 {
6348 R700ControlFlowALUClause * pCF_ALU;
6349 pInst = plstCFmain->pHead;
6350 while(pInst)
6351 {
6352 if(SIT_CF_ALU == pInst->m_ShaderInstType)
6353 {
6354 pCF_ALU = (R700ControlFlowALUClause *)pInst;
6355 if(1 == pCF_ALU->m_Word1.f.count)
6356 {
6357 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
6358 }
6359 else
6360 {
6361 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
6362
6363 pALU->m_pLinkedALUClause = NULL;
6364 pALU = (R700ALUInstruction *)(pALU->pNextInst);
6365 pALU->m_pLinkedALUClause = pCF_ALU;
6366 pCF_ALU->m_pLinkedALUInstruction = pALU;
6367
6368 pCF_ALU->m_Word1.f.count--;
6369 }
6370 break;
6371 }
6372 pInst = pInst->pNextInst;
6373 };
6374 }
6375
6376 return GL_TRUE;
6377 }
6378
6379 GLboolean Process_Export(r700_AssemblerBase* pAsm,
6380 GLuint type,
6381 GLuint export_starting_index,
6382 GLuint export_count,
6383 GLuint starting_register_number,
6384 GLboolean is_depth_export)
6385 {
6386 unsigned char ucWriteMask;
6387
6388 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
6389 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
6390
6391 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
6392
6393 switch (type)
6394 {
6395 case SQ_EXPORT_PIXEL:
6396 if(GL_TRUE == is_depth_export)
6397 {
6398 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
6399 }
6400 else
6401 {
6402 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
6403 }
6404 break;
6405
6406 case SQ_EXPORT_POS:
6407 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
6408 break;
6409
6410 case SQ_EXPORT_PARAM:
6411 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
6412 break;
6413
6414 default:
6415 radeon_error("Unknown export type: %d\n", type);
6416 return GL_FALSE;
6417 break;
6418 }
6419
6420 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
6421
6422 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
6423 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
6424 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
6425
6426 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
6427 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
6428 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6429 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
6430 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6431 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
6432
6433 if (export_count == 1)
6434 {
6435 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
6436 /* exports Z as a float into Red channel */
6437 if (GL_TRUE == is_depth_export)
6438 ucWriteMask = 0x1;
6439
6440 if( (ucWriteMask & 0x1) != 0)
6441 {
6442 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6443 }
6444 else
6445 {
6446 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
6447 }
6448 if( ((ucWriteMask>>1) & 0x1) != 0)
6449 {
6450 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6451 }
6452 else
6453 {
6454 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
6455 }
6456 if( ((ucWriteMask>>2) & 0x1) != 0)
6457 {
6458 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6459 }
6460 else
6461 {
6462 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
6463 }
6464 if( ((ucWriteMask>>3) & 0x1) != 0)
6465 {
6466 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6467 }
6468 else
6469 {
6470 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
6471 }
6472 }
6473 else
6474 {
6475 // This should only be used if all components for all registers have been written
6476 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6477 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6478 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6479 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6480 }
6481
6482 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
6483
6484 return GL_TRUE;
6485 }
6486
6487 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
6488 {
6489 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
6490 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
6491
6492 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6493
6494 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6495
6496 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6497 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6498 pAsm->D.dst.reg = pAsm->depth_export_register_number;
6499
6500 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
6501
6502 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6503 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6504 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
6505
6506 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
6507
6508 noneg_PVSSRC(&(pAsm->S[0].src));
6509
6510 if( GL_FALSE == next_ins(pAsm) )
6511 {
6512 return GL_FALSE;
6513 }
6514
6515 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
6516
6517 return GL_TRUE;
6518 }
6519
6520 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
6521 GLbitfield OutputsWritten)
6522 {
6523 unsigned int unBit;
6524 GLuint export_count = 0;
6525
6526 if(pR700AsmCode->depth_export_register_number >= 0)
6527 {
6528 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
6529 {
6530 return GL_FALSE;
6531 }
6532 }
6533
6534 unBit = 1 << FRAG_RESULT_COLOR;
6535 if(OutputsWritten & unBit)
6536 {
6537 if( GL_FALSE == Process_Export(pR700AsmCode,
6538 SQ_EXPORT_PIXEL,
6539 0,
6540 1,
6541 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
6542 GL_FALSE) )
6543 {
6544 return GL_FALSE;
6545 }
6546 export_count++;
6547 }
6548 unBit = 1 << FRAG_RESULT_DEPTH;
6549 if(OutputsWritten & unBit)
6550 {
6551 if( GL_FALSE == Process_Export(pR700AsmCode,
6552 SQ_EXPORT_PIXEL,
6553 0,
6554 1,
6555 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
6556 GL_TRUE))
6557 {
6558 return GL_FALSE;
6559 }
6560 export_count++;
6561 }
6562 /* Need to export something, otherwise we'll hang
6563 * results are undefined anyway */
6564 if(export_count == 0)
6565 {
6566 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
6567 }
6568
6569 if(pR700AsmCode->cf_last_export_ptr != NULL)
6570 {
6571 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6572 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6573 }
6574
6575 return GL_TRUE;
6576 }
6577
6578 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
6579 GLbitfield OutputsWritten)
6580 {
6581 unsigned int unBit;
6582 unsigned int i;
6583
6584 GLuint export_starting_index = 0;
6585 GLuint export_count = pR700AsmCode->number_of_exports;
6586
6587 unBit = 1 << VERT_RESULT_HPOS;
6588 if(OutputsWritten & unBit)
6589 {
6590 if( GL_FALSE == Process_Export(pR700AsmCode,
6591 SQ_EXPORT_POS,
6592 export_starting_index,
6593 1,
6594 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
6595 GL_FALSE) )
6596 {
6597 return GL_FALSE;
6598 }
6599
6600 export_count--;
6601
6602 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6603 }
6604
6605 pR700AsmCode->number_of_exports = export_count;
6606
6607 unBit = 1 << VERT_RESULT_COL0;
6608 if(OutputsWritten & unBit)
6609 {
6610 if( GL_FALSE == Process_Export(pR700AsmCode,
6611 SQ_EXPORT_PARAM,
6612 export_starting_index,
6613 1,
6614 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
6615 GL_FALSE) )
6616 {
6617 return GL_FALSE;
6618 }
6619
6620 export_starting_index++;
6621 }
6622
6623 unBit = 1 << VERT_RESULT_COL1;
6624 if(OutputsWritten & unBit)
6625 {
6626 if( GL_FALSE == Process_Export(pR700AsmCode,
6627 SQ_EXPORT_PARAM,
6628 export_starting_index,
6629 1,
6630 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
6631 GL_FALSE) )
6632 {
6633 return GL_FALSE;
6634 }
6635
6636 export_starting_index++;
6637 }
6638
6639 unBit = 1 << VERT_RESULT_FOGC;
6640 if(OutputsWritten & unBit)
6641 {
6642 if( GL_FALSE == Process_Export(pR700AsmCode,
6643 SQ_EXPORT_PARAM,
6644 export_starting_index,
6645 1,
6646 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
6647 GL_FALSE) )
6648 {
6649 return GL_FALSE;
6650 }
6651
6652 export_starting_index++;
6653 }
6654
6655 for(i=0; i<8; i++)
6656 {
6657 unBit = 1 << (VERT_RESULT_TEX0 + i);
6658 if(OutputsWritten & unBit)
6659 {
6660 if( GL_FALSE == Process_Export(pR700AsmCode,
6661 SQ_EXPORT_PARAM,
6662 export_starting_index,
6663 1,
6664 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
6665 GL_FALSE) )
6666 {
6667 return GL_FALSE;
6668 }
6669
6670 export_starting_index++;
6671 }
6672 }
6673
6674 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
6675 {
6676 unBit = 1 << i;
6677 if(OutputsWritten & unBit)
6678 {
6679 if( GL_FALSE == Process_Export(pR700AsmCode,
6680 SQ_EXPORT_PARAM,
6681 export_starting_index,
6682 1,
6683 pR700AsmCode->ucVP_OutputMap[i],
6684 GL_FALSE) )
6685 {
6686 return GL_FALSE;
6687 }
6688
6689 export_starting_index++;
6690 }
6691 }
6692
6693 // At least one param should be exported
6694 if (export_count)
6695 {
6696 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6697 }
6698 else
6699 {
6700 if( GL_FALSE == Process_Export(pR700AsmCode,
6701 SQ_EXPORT_PARAM,
6702 0,
6703 1,
6704 pR700AsmCode->starting_export_register_number,
6705 GL_FALSE) )
6706 {
6707 return GL_FALSE;
6708 }
6709
6710 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
6711 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
6712 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
6713 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
6714 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6715 }
6716
6717 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6718
6719 return GL_TRUE;
6720 }
6721
6722 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
6723 {
6724 FREE(pR700AsmCode->pucOutMask);
6725 FREE(pR700AsmCode->pInstDeps);
6726
6727 if(NULL != pR700AsmCode->subs)
6728 {
6729 FREE(pR700AsmCode->subs);
6730 }
6731 if(NULL != pR700AsmCode->callers)
6732 {
6733 FREE(pR700AsmCode->callers);
6734 }
6735
6736 return GL_TRUE;
6737 }
6738