r600 : change shader pop method for now.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 #define USE_CF_FOR_CONTINUE_BREAK 1
42 //#define USE_CF_FOR_POP_AFTER 1
43
44 BITS addrmode_PVSDST(PVSDST * pPVSDST)
45 {
46 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
47 }
48
49 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
50 {
51 pPVSDST->addrmode0 = addrmode & 1;
52 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
53 }
54
55 void nomask_PVSDST(PVSDST * pPVSDST)
56 {
57 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
58 }
59
60 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
61 {
62 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
63 }
64
65 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
66 {
67 pPVSSRC->addrmode0 = addrmode & 1;
68 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
69 }
70
71
72 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
73 {
74 pPVSSRC->swizzlex =
75 pPVSSRC->swizzley =
76 pPVSSRC->swizzlez =
77 pPVSSRC->swizzlew = swz;
78 }
79
80 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
81 {
82 pPVSSRC->swizzlex = SQ_SEL_X;
83 pPVSSRC->swizzley = SQ_SEL_Y;
84 pPVSSRC->swizzlez = SQ_SEL_Z;
85 pPVSSRC->swizzlew = SQ_SEL_W;
86 }
87
88 void
89 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
90 {
91 switch (x)
92 {
93 case SQ_SEL_X: x = pPVSSRC->swizzlex;
94 break;
95 case SQ_SEL_Y: x = pPVSSRC->swizzley;
96 break;
97 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
98 break;
99 case SQ_SEL_W: x = pPVSSRC->swizzlew;
100 break;
101 default:;
102 }
103
104 switch (y)
105 {
106 case SQ_SEL_X: y = pPVSSRC->swizzlex;
107 break;
108 case SQ_SEL_Y: y = pPVSSRC->swizzley;
109 break;
110 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
111 break;
112 case SQ_SEL_W: y = pPVSSRC->swizzlew;
113 break;
114 default:;
115 }
116
117 switch (z)
118 {
119 case SQ_SEL_X: z = pPVSSRC->swizzlex;
120 break;
121 case SQ_SEL_Y: z = pPVSSRC->swizzley;
122 break;
123 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
124 break;
125 case SQ_SEL_W: z = pPVSSRC->swizzlew;
126 break;
127 default:;
128 }
129
130 switch (w)
131 {
132 case SQ_SEL_X: w = pPVSSRC->swizzlex;
133 break;
134 case SQ_SEL_Y: w = pPVSSRC->swizzley;
135 break;
136 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
137 break;
138 case SQ_SEL_W: w = pPVSSRC->swizzlew;
139 break;
140 default:;
141 }
142
143 pPVSSRC->swizzlex = x;
144 pPVSSRC->swizzley = y;
145 pPVSSRC->swizzlez = z;
146 pPVSSRC->swizzlew = w;
147 }
148
149 void neg_PVSSRC(PVSSRC* pPVSSRC)
150 {
151 pPVSSRC->negx = 1;
152 pPVSSRC->negy = 1;
153 pPVSSRC->negz = 1;
154 pPVSSRC->negw = 1;
155 }
156
157 void noneg_PVSSRC(PVSSRC* pPVSSRC)
158 {
159 pPVSSRC->negx = 0;
160 pPVSSRC->negy = 0;
161 pPVSSRC->negz = 0;
162 pPVSSRC->negw = 0;
163 }
164
165 // negate argument (for SUB instead of ADD and alike)
166 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
167 {
168 pPVSSRC->negx = !pPVSSRC->negx;
169 pPVSSRC->negy = !pPVSSRC->negy;
170 pPVSSRC->negz = !pPVSSRC->negz;
171 pPVSSRC->negw = !pPVSSRC->negw;
172 }
173
174 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
175 {
176 switch (c)
177 {
178 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
179 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
180 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
181 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
182 default:;
183 }
184 }
185
186 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
187 {
188 switch (c)
189 {
190 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
191 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
192 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
193 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
194 default:;
195 }
196 }
197
198 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
199 {
200 return (pOutVTXFmt0->point_size |
201 pOutVTXFmt0->edge_flag |
202 pOutVTXFmt0->rta_index |
203 pOutVTXFmt0->kill_flag |
204 pOutVTXFmt0->viewport_index);
205 }
206
207 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
208 {
209 return (pFPOutFmt->depth |
210 pFPOutFmt->stencil_ref |
211 pFPOutFmt->mask |
212 pFPOutFmt->coverage_to_mask);
213 }
214
215 GLboolean is_reduction_opcode(PVSDWORD* dest)
216 {
217 if (dest->dst.op3 == 0)
218 {
219 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
220 {
221 return GL_TRUE;
222 }
223 }
224 return GL_FALSE;
225 }
226
227 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
228 {
229 GLuint format = FMT_INVALID;
230 GLuint uiElemSize = 0;
231
232 switch (eType)
233 {
234 case GL_BYTE:
235 case GL_UNSIGNED_BYTE:
236 uiElemSize = 1;
237 switch(nChannels)
238 {
239 case 1:
240 format = FMT_8; break;
241 case 2:
242 format = FMT_8_8; break;
243 case 3:
244 format = FMT_8_8_8; break;
245 case 4:
246 format = FMT_8_8_8_8; break;
247 default:
248 break;
249 }
250 break;
251
252 case GL_UNSIGNED_SHORT:
253 case GL_SHORT:
254 uiElemSize = 2;
255 switch(nChannels)
256 {
257 case 1:
258 format = FMT_16; break;
259 case 2:
260 format = FMT_16_16; break;
261 case 3:
262 format = FMT_16_16_16; break;
263 case 4:
264 format = FMT_16_16_16_16; break;
265 default:
266 break;
267 }
268 break;
269
270 case GL_UNSIGNED_INT:
271 case GL_INT:
272 uiElemSize = 4;
273 switch(nChannels)
274 {
275 case 1:
276 format = FMT_32; break;
277 case 2:
278 format = FMT_32_32; break;
279 case 3:
280 format = FMT_32_32_32; break;
281 case 4:
282 format = FMT_32_32_32_32; break;
283 default:
284 break;
285 }
286 break;
287
288 case GL_FLOAT:
289 uiElemSize = 4;
290 switch(nChannels)
291 {
292 case 1:
293 format = FMT_32_FLOAT; break;
294 case 2:
295 format = FMT_32_32_FLOAT; break;
296 case 3:
297 format = FMT_32_32_32_FLOAT; break;
298 case 4:
299 format = FMT_32_32_32_32_FLOAT; break;
300 default:
301 break;
302 }
303 break;
304 case GL_DOUBLE:
305 uiElemSize = 8;
306 switch(nChannels)
307 {
308 case 1:
309 format = FMT_32_FLOAT; break;
310 case 2:
311 format = FMT_32_32_FLOAT; break;
312 case 3:
313 format = FMT_32_32_32_FLOAT; break;
314 case 4:
315 format = FMT_32_32_32_32_FLOAT; break;
316 default:
317 break;
318 }
319 break;
320 default:
321 ;
322 //GL_ASSERT_NO_CASE();
323 }
324
325 if(NULL != pClient_size)
326 {
327 *pClient_size = uiElemSize * nChannels;
328 }
329
330 return(format);
331 }
332
333 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
334 {
335 if(pAsm->D.dst.op3)
336 {
337 return 3;
338 }
339
340 switch (pAsm->D.dst.opcode)
341 {
342 case SQ_OP2_INST_ADD:
343 case SQ_OP2_INST_KILLGT:
344 case SQ_OP2_INST_MUL:
345 case SQ_OP2_INST_MAX:
346 case SQ_OP2_INST_MIN:
347 //case SQ_OP2_INST_MAX_DX10:
348 //case SQ_OP2_INST_MIN_DX10:
349 case SQ_OP2_INST_SETE:
350 case SQ_OP2_INST_SETNE:
351 case SQ_OP2_INST_SETGT:
352 case SQ_OP2_INST_SETGE:
353 case SQ_OP2_INST_PRED_SETE:
354 case SQ_OP2_INST_PRED_SETGT:
355 case SQ_OP2_INST_PRED_SETGE:
356 case SQ_OP2_INST_PRED_SETNE:
357 case SQ_OP2_INST_DOT4:
358 case SQ_OP2_INST_DOT4_IEEE:
359 case SQ_OP2_INST_CUBE:
360 return 2;
361
362 case SQ_OP2_INST_MOV:
363 case SQ_OP2_INST_MOVA_FLOOR:
364 case SQ_OP2_INST_FRACT:
365 case SQ_OP2_INST_FLOOR:
366 case SQ_OP2_INST_EXP_IEEE:
367 case SQ_OP2_INST_LOG_CLAMPED:
368 case SQ_OP2_INST_LOG_IEEE:
369 case SQ_OP2_INST_RECIP_IEEE:
370 case SQ_OP2_INST_RECIPSQRT_IEEE:
371 case SQ_OP2_INST_FLT_TO_INT:
372 case SQ_OP2_INST_SIN:
373 case SQ_OP2_INST_COS:
374 return 1;
375
376 default: radeon_error(
377 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
378 };
379
380 return 3;
381 }
382
383 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
384 {
385 GLuint i;
386
387 Init_R700_Shader(pShader);
388 pAsm->pR700Shader = pShader;
389 pAsm->currentShaderType = spt;
390
391 pAsm->cf_last_export_ptr = NULL;
392
393 pAsm->cf_current_export_clause_ptr = NULL;
394 pAsm->cf_current_alu_clause_ptr = NULL;
395 pAsm->cf_current_tex_clause_ptr = NULL;
396 pAsm->cf_current_vtx_clause_ptr = NULL;
397 pAsm->cf_current_cf_clause_ptr = NULL;
398
399 // No clause has been created yet
400 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
401
402 pAsm->number_of_colorandz_exports = 0;
403 pAsm->number_of_exports = 0;
404 pAsm->number_of_export_opcodes = 0;
405
406 pAsm->alu_x_opcode = 0;
407
408 pAsm->D2.bits = 0;
409
410 pAsm->D.bits = 0;
411 pAsm->S[0].bits = 0;
412 pAsm->S[1].bits = 0;
413 pAsm->S[2].bits = 0;
414
415 pAsm->uLastPosUpdate = 0;
416
417 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
418
419 pAsm->uIIns = 0;
420 pAsm->uOIns = 0;
421 pAsm->number_used_registers = 0;
422 pAsm->uUsedConsts = 256;
423
424
425 // Fragment programs
426 pAsm->uBoolConsts = 0;
427 pAsm->uIntConsts = 0;
428 pAsm->uInsts = 0;
429 pAsm->uConsts = 0;
430
431 pAsm->FCSP = 0;
432 pAsm->fc_stack[0].type = FC_NONE;
433
434 pAsm->branch_depth = 0;
435 pAsm->max_branch_depth = 0;
436
437 pAsm->aArgSubst[0] =
438 pAsm->aArgSubst[1] =
439 pAsm->aArgSubst[2] =
440 pAsm->aArgSubst[3] = (-1);
441
442 pAsm->uOutputs = 0;
443
444 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
445 {
446 pAsm->color_export_register_number[i] = (-1);
447 }
448
449
450 pAsm->depth_export_register_number = (-1);
451 pAsm->stencil_export_register_number = (-1);
452 pAsm->coverage_to_mask_export_register_number = (-1);
453 pAsm->mask_export_register_number = (-1);
454
455 pAsm->starting_export_register_number = 0;
456 pAsm->starting_vfetch_register_number = 0;
457 pAsm->starting_temp_register_number = 0;
458 pAsm->uFirstHelpReg = 0;
459
460
461 pAsm->input_position_is_used = GL_FALSE;
462 pAsm->input_normal_is_used = GL_FALSE;
463
464
465 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
466 {
467 pAsm->input_color_is_used[ i ] = GL_FALSE;
468 }
469
470 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
471 {
472 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
473 }
474
475 for (i=0; i<VERT_ATTRIB_MAX; i++)
476 {
477 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
478 }
479
480 pAsm->number_of_inputs = 0;
481
482 pAsm->is_tex = GL_FALSE;
483 pAsm->need_tex_barrier = GL_FALSE;
484
485 pAsm->subs = NULL;
486 pAsm->unSubArraySize = 0;
487 pAsm->unSubArrayPointer = 0;
488 pAsm->callers = NULL;
489 pAsm->unCallerArraySize = 0;
490 pAsm->unCallerArrayPointer = 0;
491
492 pAsm->CALLSP = 0;
493 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
494 pAsm->CALLSTACK[0].plstCFInstructions_local
495 = &(pAsm->pR700Shader->lstCFInstructions);
496
497 pAsm->CALLSTACK[0].stackUsage.bits = 0;
498
499 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
500
501 pAsm->unCFflags = 0;
502
503 return 0;
504 }
505
506 GLboolean IsTex(gl_inst_opcode Opcode)
507 {
508 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
509 {
510 return GL_TRUE;
511 }
512 return GL_FALSE;
513 }
514
515 GLboolean IsAlu(gl_inst_opcode Opcode)
516 {
517 //TODO : more for fc and ex for higher spec.
518 if( IsTex(Opcode) )
519 {
520 return GL_FALSE;
521 }
522 return GL_TRUE;
523 }
524
525 int check_current_clause(r700_AssemblerBase* pAsm,
526 CF_CLAUSE_TYPE new_clause_type)
527 {
528 if (pAsm->cf_current_clause_type != new_clause_type)
529 { //Close last open clause
530 switch (pAsm->cf_current_clause_type)
531 {
532 case CF_ALU_CLAUSE:
533 if ( pAsm->cf_current_alu_clause_ptr != NULL)
534 {
535 pAsm->cf_current_alu_clause_ptr = NULL;
536 }
537 break;
538 case CF_VTX_CLAUSE:
539 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
540 {
541 pAsm->cf_current_vtx_clause_ptr = NULL;
542 }
543 break;
544 case CF_TEX_CLAUSE:
545 if ( pAsm->cf_current_tex_clause_ptr != NULL)
546 {
547 pAsm->cf_current_tex_clause_ptr = NULL;
548 }
549 break;
550 case CF_EXPORT_CLAUSE:
551 if ( pAsm->cf_current_export_clause_ptr != NULL)
552 {
553 pAsm->cf_current_export_clause_ptr = NULL;
554 }
555 break;
556 case CF_OTHER_CLAUSE:
557 if ( pAsm->cf_current_cf_clause_ptr != NULL)
558 {
559 pAsm->cf_current_cf_clause_ptr = NULL;
560 }
561 break;
562 case CF_EMPTY_CLAUSE:
563 break;
564 default:
565 radeon_error(
566 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
567 return GL_FALSE;
568 }
569
570 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
571
572 // Create new clause
573 switch (new_clause_type)
574 {
575 case CF_ALU_CLAUSE:
576 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
577 break;
578 case CF_VTX_CLAUSE:
579 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
580 break;
581 case CF_TEX_CLAUSE:
582 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
583 break;
584 case CF_EXPORT_CLAUSE:
585 {
586 R700ControlFlowSXClause* pR700ControlFlowSXClause
587 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
588
589 // Add new export instruction to control flow program
590 if (pR700ControlFlowSXClause != 0)
591 {
592 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
593 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
594 AddCFInstruction( pAsm->pR700Shader,
595 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
596 }
597 else
598 {
599 radeon_error(
600 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
601 return GL_FALSE;
602 }
603 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
604 }
605 break;
606 case CF_EMPTY_CLAUSE:
607 break;
608 case CF_OTHER_CLAUSE:
609 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
610 break;
611 default:
612 radeon_error(
613 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
614 return GL_FALSE;
615 }
616 }
617
618 return GL_TRUE;
619 }
620
621 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
622 {
623 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
624 {
625 return GL_FALSE;
626 }
627
628 pAsm->cf_current_cf_clause_ptr =
629 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
630
631 if (pAsm->cf_current_cf_clause_ptr != NULL)
632 {
633 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
634 AddCFInstruction( pAsm->pR700Shader,
635 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
636 }
637 else
638 {
639 radeon_error("Could not allocate a new VFetch CF instruction.\n");
640 return GL_FALSE;
641 }
642
643 return GL_TRUE;
644 }
645
646 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
647 R700VertexInstruction* vertex_instruction_ptr)
648 {
649 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
650 {
651 return GL_FALSE;
652 }
653
654 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
655 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
656 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
657 ) )
658 {
659 // Create new Vfetch control flow instruction for this new clause
660 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
661
662 if (pAsm->cf_current_vtx_clause_ptr != NULL)
663 {
664 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
665 AddCFInstruction( pAsm->pR700Shader,
666 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
667 }
668 else
669 {
670 radeon_error("Could not allocate a new VFetch CF instruction.\n");
671 return GL_FALSE;
672 }
673
674 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
675 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
676 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
677 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
678 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
679 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
680 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
681 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
682 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
683
684 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
685 }
686 else
687 {
688 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
689 }
690
691 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
692
693 return GL_TRUE;
694 }
695
696 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
697 R700TextureInstruction* tex_instruction_ptr)
698 {
699 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
700 {
701 return GL_FALSE;
702 }
703
704 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
705 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
706 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
707 ) )
708 {
709 // new tex cf instruction for this new clause
710 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
711
712 if (pAsm->cf_current_tex_clause_ptr != NULL)
713 {
714 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
715 AddCFInstruction( pAsm->pR700Shader,
716 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
717 }
718 else
719 {
720 radeon_error("Could not allocate a new TEX CF instruction.\n");
721 return GL_FALSE;
722 }
723
724 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
725 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
726 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
727
728 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
729 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
730 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
731 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
732 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
733 }
734 else
735 {
736 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
737 }
738
739 // If this clause constains any TEX instruction that is dependent on a previous instruction,
740 // set the barrier bit
741 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
742 {
743 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
744 }
745
746 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
747 {
748 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
749 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
750 }
751
752 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
753
754 return GL_TRUE;
755 }
756
757 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
758 GLuint gl_client_id,
759 GLuint destination_register,
760 GLuint number_of_elements,
761 GLenum dataElementType,
762 VTX_FETCH_METHOD* pFetchMethod)
763 {
764 GLuint client_size_inbyte;
765 GLuint data_format;
766 GLuint mega_fetch_count;
767 GLuint is_mega_fetch_flag;
768
769 R700VertexGenericFetch* vfetch_instruction_ptr;
770 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
771
772 if (assembled_vfetch_instruction_ptr == NULL)
773 {
774 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
775 if (vfetch_instruction_ptr == NULL)
776 {
777 return GL_FALSE;
778 }
779 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
780 }
781 else
782 {
783 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
784 }
785
786 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
787
788 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
789 {
790 //TODO : mini fetch
791 }
792 else
793 {
794 mega_fetch_count = MEGA_FETCH_BYTES - 1;
795 is_mega_fetch_flag = 0x1;
796 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
797 }
798
799 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
800 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
801 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
802
803 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
804 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
805 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
806 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
807 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
808
809 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
810 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
811 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
812 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
813
814 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
815
816 // Destination register
817 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
818 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
819
820 vfetch_instruction_ptr->m_Word2.f.offset = 0;
821 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
822
823 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
824
825 if (assembled_vfetch_instruction_ptr == NULL)
826 {
827 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
828 {
829 return GL_FALSE;
830 }
831
832 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
833 {
834 return GL_FALSE;
835 }
836 else
837 {
838 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
839 }
840 }
841
842 return GL_TRUE;
843 }
844
845 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
846 GLuint destination_register,
847 GLenum type,
848 GLint size,
849 GLubyte element,
850 GLuint _signed,
851 GLboolean normalize,
852 VTX_FETCH_METHOD * pFetchMethod)
853 {
854 GLuint client_size_inbyte;
855 GLuint data_format;
856 GLuint mega_fetch_count;
857 GLuint is_mega_fetch_flag;
858
859 R700VertexGenericFetch* vfetch_instruction_ptr;
860 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
861 = pAsm->vfetch_instruction_ptr_array[element];
862
863 if (assembled_vfetch_instruction_ptr == NULL)
864 {
865 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
866 if (vfetch_instruction_ptr == NULL)
867 {
868 return GL_FALSE;
869 }
870 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
871 }
872 else
873 {
874 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
875 }
876
877 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
878
879 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
880 {
881 //TODO : mini fetch
882 }
883 else
884 {
885 mega_fetch_count = MEGA_FETCH_BYTES - 1;
886 is_mega_fetch_flag = 0x1;
887 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
888 }
889
890 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
891 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
892 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
893
894 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
895 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
896 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
897 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
898 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
899
900 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
901 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
902 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
903 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
904
905 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
906 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
907 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
908
909 if(1 == _signed)
910 {
911 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
912 }
913 else
914 {
915 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
916 }
917
918 if(GL_TRUE == normalize)
919 {
920 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
921 }
922 else
923 {
924 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
925 }
926
927 // Destination register
928 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
929 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
930
931 vfetch_instruction_ptr->m_Word2.f.offset = 0;
932 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
933
934 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
935
936 if (assembled_vfetch_instruction_ptr == NULL)
937 {
938 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
939 {
940 return GL_FALSE;
941 }
942
943 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
944 {
945 return GL_FALSE;
946 }
947 else
948 {
949 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
950 }
951 }
952
953 return GL_TRUE;
954 }
955
956 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
957 {
958 GLint i;
959 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
960 pAsm->cf_current_vtx_clause_ptr = NULL;
961
962 for (i=0; i<VERT_ATTRIB_MAX; i++)
963 {
964 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
965 }
966
967 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
968
969 return GL_TRUE;
970 }
971
972 GLuint gethelpr(r700_AssemblerBase* pAsm)
973 {
974 GLuint r = pAsm->uHelpReg;
975 pAsm->uHelpReg++;
976 if (pAsm->uHelpReg > pAsm->number_used_registers)
977 {
978 pAsm->number_used_registers = pAsm->uHelpReg;
979 }
980 return r;
981 }
982 void resethelpr(r700_AssemblerBase* pAsm)
983 {
984 pAsm->uHelpReg = pAsm->uFirstHelpReg;
985 }
986
987 void checkop_init(r700_AssemblerBase* pAsm)
988 {
989 resethelpr(pAsm);
990 pAsm->aArgSubst[0] =
991 pAsm->aArgSubst[1] =
992 pAsm->aArgSubst[2] =
993 pAsm->aArgSubst[3] = -1;
994 }
995
996 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
997 {
998 GLuint tmp = gethelpr(pAsm);
999
1000 //mov src to temp helper gpr.
1001 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1002
1003 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1004
1005 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1006 pAsm->D.dst.reg = tmp;
1007
1008 nomask_PVSDST(&(pAsm->D.dst));
1009
1010 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1011 {
1012 return GL_FALSE;
1013 }
1014
1015 noswizzle_PVSSRC(&(pAsm->S[0].src));
1016 noneg_PVSSRC(&(pAsm->S[0].src));
1017
1018 if( GL_FALSE == next_ins(pAsm) )
1019 {
1020 return GL_FALSE;
1021 }
1022
1023 pAsm->aArgSubst[1 + src] = tmp;
1024
1025 return GL_TRUE;
1026 }
1027
1028 GLboolean checkop1(r700_AssemblerBase* pAsm)
1029 {
1030 checkop_init(pAsm);
1031 return GL_TRUE;
1032 }
1033
1034 GLboolean checkop2(r700_AssemblerBase* pAsm)
1035 {
1036 GLboolean bSrcConst[2];
1037 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1038
1039 checkop_init(pAsm);
1040
1041 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1042 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1043 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1044 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1045 {
1046 bSrcConst[0] = GL_TRUE;
1047 }
1048 else
1049 {
1050 bSrcConst[0] = GL_FALSE;
1051 }
1052 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1053 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1054 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1055 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1056 {
1057 bSrcConst[1] = GL_TRUE;
1058 }
1059 else
1060 {
1061 bSrcConst[1] = GL_FALSE;
1062 }
1063
1064 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1065 {
1066 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1067 {
1068 if( GL_FALSE == mov_temp(pAsm, 1) )
1069 {
1070 return GL_FALSE;
1071 }
1072 }
1073 }
1074
1075 return GL_TRUE;
1076 }
1077
1078 GLboolean checkop3(r700_AssemblerBase* pAsm)
1079 {
1080 GLboolean bSrcConst[3];
1081 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1082
1083 checkop_init(pAsm);
1084
1085 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1086 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1087 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1088 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1089 {
1090 bSrcConst[0] = GL_TRUE;
1091 }
1092 else
1093 {
1094 bSrcConst[0] = GL_FALSE;
1095 }
1096 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1097 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1098 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1099 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1100 {
1101 bSrcConst[1] = GL_TRUE;
1102 }
1103 else
1104 {
1105 bSrcConst[1] = GL_FALSE;
1106 }
1107 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1108 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1109 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1110 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1111 {
1112 bSrcConst[2] = GL_TRUE;
1113 }
1114 else
1115 {
1116 bSrcConst[2] = GL_FALSE;
1117 }
1118
1119 if( (GL_TRUE == bSrcConst[0]) &&
1120 (GL_TRUE == bSrcConst[1]) &&
1121 (GL_TRUE == bSrcConst[2]) )
1122 {
1123 if( GL_FALSE == mov_temp(pAsm, 1) )
1124 {
1125 return GL_FALSE;
1126 }
1127 if( GL_FALSE == mov_temp(pAsm, 2) )
1128 {
1129 return GL_FALSE;
1130 }
1131
1132 return GL_TRUE;
1133 }
1134 else if( (GL_TRUE == bSrcConst[0]) &&
1135 (GL_TRUE == bSrcConst[1]) )
1136 {
1137 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1138 {
1139 if( GL_FALSE == mov_temp(pAsm, 1) )
1140 {
1141 return 1;
1142 }
1143 }
1144
1145 return GL_TRUE;
1146 }
1147 else if ( (GL_TRUE == bSrcConst[0]) &&
1148 (GL_TRUE == bSrcConst[2]) )
1149 {
1150 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1151 {
1152 if( GL_FALSE == mov_temp(pAsm, 2) )
1153 {
1154 return GL_FALSE;
1155 }
1156 }
1157
1158 return GL_TRUE;
1159 }
1160 else if( (GL_TRUE == bSrcConst[1]) &&
1161 (GL_TRUE == bSrcConst[2]) )
1162 {
1163 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1164 {
1165 if( GL_FALSE == mov_temp(pAsm, 2) )
1166 {
1167 return GL_FALSE;
1168 }
1169 }
1170
1171 return GL_TRUE;
1172 }
1173
1174 return GL_TRUE;
1175 }
1176
1177 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1178 int src,
1179 int fld)
1180 {
1181 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1182
1183 if (fld == -1)
1184 {
1185 fld = src;
1186 }
1187
1188 if(pAsm->aArgSubst[1+src] >= 0)
1189 {
1190 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1191 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1192 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1193 }
1194 else
1195 {
1196 switch (pILInst->SrcReg[src].File)
1197 {
1198 case PROGRAM_TEMPORARY:
1199 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1200 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1201 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1202 break;
1203 case PROGRAM_CONSTANT:
1204 case PROGRAM_LOCAL_PARAM:
1205 case PROGRAM_ENV_PARAM:
1206 case PROGRAM_STATE_VAR:
1207 case PROGRAM_UNIFORM:
1208 if (1 == pILInst->SrcReg[src].RelAddr)
1209 {
1210 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1211 }
1212 else
1213 {
1214 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1215 }
1216
1217 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1218 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1219 break;
1220 case PROGRAM_INPUT:
1221 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1222 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1223 switch (pAsm->currentShaderType)
1224 {
1225 case SPT_FP:
1226 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1227 break;
1228 case SPT_VP:
1229 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1230 break;
1231 }
1232 break;
1233 default:
1234 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1235 return GL_FALSE;
1236 }
1237 }
1238
1239 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1240 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1241 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1242 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1243
1244 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1245 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1246 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1247 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1248
1249 return GL_TRUE;
1250 }
1251
1252 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1253 {
1254 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1255 switch (pILInst->DstReg.File)
1256 {
1257 case PROGRAM_TEMPORARY:
1258 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1259 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1260 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1261 break;
1262 case PROGRAM_ADDRESS:
1263 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1264 pAsm->D.dst.rtype = DST_REG_A0;
1265 pAsm->D.dst.reg = 0;
1266 break;
1267 case PROGRAM_OUTPUT:
1268 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1269 pAsm->D.dst.rtype = DST_REG_OUT;
1270 switch (pAsm->currentShaderType)
1271 {
1272 case SPT_FP:
1273 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1274 break;
1275 case SPT_VP:
1276 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1277 break;
1278 }
1279 break;
1280 default:
1281 radeon_error("Invalid destination output argument type\n");
1282 return GL_FALSE;
1283 }
1284
1285 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1286 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1287 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1288 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1289
1290 return GL_TRUE;
1291 }
1292
1293 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1294 {
1295 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1296
1297 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1298 {
1299 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1300 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1301
1302 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1303 }
1304 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1305 {
1306 pAsm->D.dst.rtype = DST_REG_OUT;
1307 switch (pAsm->currentShaderType)
1308 {
1309 case SPT_FP:
1310 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1311 break;
1312 case SPT_VP:
1313 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1314 break;
1315 }
1316
1317 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1318 }
1319 else
1320 {
1321 radeon_error("Invalid destination output argument type\n");
1322 return GL_FALSE;
1323 }
1324
1325 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1326 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1327 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1328 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1329
1330 return GL_TRUE;
1331 }
1332
1333 GLboolean tex_src(r700_AssemblerBase *pAsm)
1334 {
1335 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1336
1337 GLboolean bValidTexCoord = GL_FALSE;
1338
1339 if(pAsm->aArgSubst[1] >= 0)
1340 {
1341 bValidTexCoord = GL_TRUE;
1342 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1343 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1344 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1345 }
1346 else
1347 {
1348 switch (pILInst->SrcReg[0].File) {
1349 case PROGRAM_CONSTANT:
1350 case PROGRAM_LOCAL_PARAM:
1351 case PROGRAM_ENV_PARAM:
1352 case PROGRAM_STATE_VAR:
1353 break;
1354 case PROGRAM_TEMPORARY:
1355 bValidTexCoord = GL_TRUE;
1356 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1357 pAsm->starting_temp_register_number;
1358 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1359 break;
1360 case PROGRAM_INPUT:
1361 switch (pILInst->SrcReg[0].Index)
1362 {
1363 case FRAG_ATTRIB_WPOS:
1364 case FRAG_ATTRIB_COL0:
1365 case FRAG_ATTRIB_COL1:
1366 case FRAG_ATTRIB_FOGC:
1367 case FRAG_ATTRIB_TEX0:
1368 case FRAG_ATTRIB_TEX1:
1369 case FRAG_ATTRIB_TEX2:
1370 case FRAG_ATTRIB_TEX3:
1371 case FRAG_ATTRIB_TEX4:
1372 case FRAG_ATTRIB_TEX5:
1373 case FRAG_ATTRIB_TEX6:
1374 case FRAG_ATTRIB_TEX7:
1375 bValidTexCoord = GL_TRUE;
1376 pAsm->S[0].src.reg =
1377 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1378 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1379 break;
1380 case FRAG_ATTRIB_FACE:
1381 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1382 break;
1383 case FRAG_ATTRIB_PNTC:
1384 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1385 break;
1386 case FRAG_ATTRIB_VAR0:
1387 fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
1388 break;
1389 }
1390
1391 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1392 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1393 {
1394 bValidTexCoord = GL_TRUE;
1395 pAsm->S[0].src.reg =
1396 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1397 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1398 }
1399
1400 break;
1401 }
1402 }
1403
1404 if(GL_TRUE == bValidTexCoord)
1405 {
1406 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1407 }
1408 else
1409 {
1410 radeon_error("Invalid source texcoord for TEX instruction\n");
1411 return GL_FALSE;
1412 }
1413
1414 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1415 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1416 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1417 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1418
1419 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1420 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1421 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1422 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1423
1424 return GL_TRUE;
1425 }
1426
1427 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1428 {
1429 PVSSRC * texture_coordinate_source;
1430 PVSSRC * texture_unit_source;
1431
1432 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1433 if (tex_instruction_ptr == NULL)
1434 {
1435 return GL_FALSE;
1436 }
1437 Init_R700TextureInstruction(tex_instruction_ptr);
1438
1439 texture_coordinate_source = &(pAsm->S[0].src);
1440 texture_unit_source = &(pAsm->S[1].src);
1441
1442 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1443 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1444 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1445
1446 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1447
1448 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1449 if (normalized) {
1450 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1451 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1452 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1453 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1454 } else {
1455 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1456 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1457 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1458 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1459 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1460 }
1461
1462 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1463 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1464 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1465
1466 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1467
1468 // dst
1469 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1470 (pAsm->D.dst.rtype == DST_REG_OUT) )
1471 {
1472 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1473 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1474
1475 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1476 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1477
1478 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1479 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1480 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1481 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1482
1483
1484 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1485 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1486 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1487 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1488 }
1489 else
1490 {
1491 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1492 return GL_FALSE;
1493 }
1494
1495 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1496 {
1497 return GL_FALSE;
1498 }
1499
1500 return GL_TRUE;
1501 }
1502
1503 void initialize(r700_AssemblerBase *pAsm)
1504 {
1505 GLuint cycle, component;
1506
1507 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1508 {
1509 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1510 {
1511 pAsm->hw_gpr[cycle][component] = (-1);
1512 }
1513 }
1514 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1515 {
1516 pAsm->hw_cfile_addr[component] = (-1);
1517 pAsm->hw_cfile_chan[component] = (-1);
1518 }
1519 }
1520
1521 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1522 int source_index,
1523 PVSSRC* pSource,
1524 BITS scalar_channel_index)
1525 {
1526 BITS src_sel;
1527 BITS src_rel;
1528 BITS src_chan;
1529 BITS src_neg;
1530
1531 //--------------------------------------------------------------------------
1532 // Source for operands src0, src1.
1533 // Values [0,127] correspond to GPR[0..127].
1534 // Values [256,511] correspond to cfile constants c[0..255].
1535
1536 //--------------------------------------------------------------------------
1537 // Other special values are shown in the list below.
1538
1539 // 248 SQ_ALU_SRC_0: special constant 0.0.
1540 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1541
1542 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1543 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1544
1545 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1546 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1547
1548 // 254 SQ_ALU_SRC_PV: previous vector result.
1549 // 255 SQ_ALU_SRC_PS: previous scalar result.
1550 //--------------------------------------------------------------------------
1551
1552 BITS channel_swizzle;
1553 switch (scalar_channel_index)
1554 {
1555 case 0: channel_swizzle = pSource->swizzlex; break;
1556 case 1: channel_swizzle = pSource->swizzley; break;
1557 case 2: channel_swizzle = pSource->swizzlez; break;
1558 case 3: channel_swizzle = pSource->swizzlew; break;
1559 default: channel_swizzle = SQ_SEL_MASK; break;
1560 }
1561
1562 if(channel_swizzle == SQ_SEL_0)
1563 {
1564 src_sel = SQ_ALU_SRC_0;
1565 }
1566 else if (channel_swizzle == SQ_SEL_1)
1567 {
1568 src_sel = SQ_ALU_SRC_1;
1569 }
1570 else
1571 {
1572 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1573 (pSource->rtype == SRC_REG_INPUT)
1574 )
1575 {
1576 src_sel = pSource->reg;
1577 }
1578 else if (pSource->rtype == SRC_REG_CONSTANT)
1579 {
1580 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1581 }
1582 else if (pSource->rtype == SRC_REC_LITERAL)
1583 {
1584 src_sel = SQ_ALU_SRC_LITERAL;
1585 }
1586 else
1587 {
1588 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1589 source_index, pSource->rtype);
1590 return GL_FALSE;
1591 }
1592 }
1593
1594 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1595 {
1596 src_rel = SQ_ABSOLUTE;
1597 }
1598 else
1599 {
1600 src_rel = SQ_RELATIVE;
1601 }
1602
1603 switch (channel_swizzle)
1604 {
1605 case SQ_SEL_X:
1606 src_chan = SQ_CHAN_X;
1607 break;
1608 case SQ_SEL_Y:
1609 src_chan = SQ_CHAN_Y;
1610 break;
1611 case SQ_SEL_Z:
1612 src_chan = SQ_CHAN_Z;
1613 break;
1614 case SQ_SEL_W:
1615 src_chan = SQ_CHAN_W;
1616 break;
1617 case SQ_SEL_0:
1618 case SQ_SEL_1:
1619 // Does not matter since src_sel controls
1620 src_chan = SQ_CHAN_X;
1621 break;
1622 default:
1623 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1624 return GL_FALSE;
1625 break;
1626 }
1627
1628 switch (scalar_channel_index)
1629 {
1630 case 0: src_neg = pSource->negx; break;
1631 case 1: src_neg = pSource->negy; break;
1632 case 2: src_neg = pSource->negz; break;
1633 case 3: src_neg = pSource->negw; break;
1634 default: src_neg = 0; break;
1635 }
1636
1637 switch (source_index)
1638 {
1639 case 0:
1640 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1641 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1642 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1643 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1644 break;
1645 case 1:
1646 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1647 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1648 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1649 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1650 break;
1651 case 2:
1652 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1653 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1654 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1655 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1656 break;
1657 default:
1658 radeon_error("Only three sources allowed in ALU opcodes.\n");
1659 return GL_FALSE;
1660 break;
1661 }
1662
1663 return GL_TRUE;
1664 }
1665
1666 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1667 R700ALUInstruction* alu_instruction_ptr,
1668 GLuint contiguous_slots_needed)
1669 {
1670 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1671 {
1672 return GL_FALSE;
1673 }
1674
1675 if ( pAsm->alu_x_opcode != 0 ||
1676 pAsm->cf_current_alu_clause_ptr == NULL ||
1677 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1678 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1679 ) )
1680 {
1681
1682 //new cf inst for this clause
1683 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1684
1685 // link the new cf to cf segment
1686 if(NULL != pAsm->cf_current_alu_clause_ptr)
1687 {
1688 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1689 AddCFInstruction( pAsm->pR700Shader,
1690 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1691 }
1692 else
1693 {
1694 radeon_error("Could not allocate a new ALU CF instruction.\n");
1695 return GL_FALSE;
1696 }
1697
1698 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1699 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1700 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1701
1702 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1703 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1704 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1705
1706 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1707
1708 if(pAsm->alu_x_opcode != 0)
1709 {
1710 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
1711 pAsm->alu_x_opcode = 0;
1712 }
1713 else
1714 {
1715 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1716 }
1717
1718 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1719
1720 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1721 }
1722 else
1723 {
1724 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1725 }
1726
1727 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1728 // set the whole_quad_mode for this clause
1729 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1730 {
1731 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1732 }
1733
1734 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1735 {
1736 alu_instruction_ptr->m_Word0.f.last = 1;
1737 }
1738
1739 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1740 {
1741 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1742 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1743 }
1744
1745 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1746
1747 return GL_TRUE;
1748 }
1749
1750 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1751 int source_index,
1752 BITS* psrc_sel,
1753 BITS* psrc_rel,
1754 BITS* psrc_chan,
1755 BITS* psrc_neg)
1756 {
1757 switch (source_index)
1758 {
1759 case 0:
1760 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1761 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1762 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1763 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1764 break;
1765
1766 case 1:
1767 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1768 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1769 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1770 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1771 break;
1772
1773 case 2:
1774 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1775 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1776 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1777 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1778 break;
1779 }
1780 }
1781
1782 int is_cfile(BITS sel)
1783 {
1784 if (sel > 255 && sel < 512)
1785 {
1786 return 1;
1787 }
1788 return 0;
1789 }
1790
1791 int is_const(BITS sel)
1792 {
1793 if (is_cfile(sel))
1794 {
1795 return 1;
1796 }
1797 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1798 {
1799 return 1;
1800 }
1801 return 0;
1802 }
1803
1804 int is_gpr(BITS sel)
1805 {
1806 if (sel >= 0 && sel < 128)
1807 {
1808 return 1;
1809 }
1810 return 0;
1811 }
1812
1813 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1814 SQ_ALU_VEC_120, //001
1815 SQ_ALU_VEC_102, //010
1816
1817 SQ_ALU_VEC_201, //011
1818 SQ_ALU_VEC_012, //100
1819 SQ_ALU_VEC_021, //101
1820
1821 SQ_ALU_VEC_012, //110
1822 SQ_ALU_VEC_012}; //111
1823
1824 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1825 SQ_ALU_SCL_122, //001
1826 SQ_ALU_SCL_122, //010
1827
1828 SQ_ALU_SCL_221, //011
1829 SQ_ALU_SCL_212, //100
1830 SQ_ALU_SCL_122, //101
1831
1832 SQ_ALU_SCL_122, //110
1833 SQ_ALU_SCL_122}; //111
1834
1835 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1836 GLuint sel,
1837 GLuint chan)
1838 {
1839 int res_match = (-1);
1840 int res_empty = (-1);
1841
1842 GLint res;
1843
1844 for (res=3; res>=0; res--)
1845 {
1846 if(pAsm->hw_cfile_addr[ res] < 0)
1847 {
1848 res_empty = res;
1849 }
1850 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1851 &&
1852 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1853 {
1854 res_match = res;
1855 }
1856 }
1857
1858 if(res_match >= 0)
1859 {
1860 // Read for this scalar component already reserved, nothing to do here.
1861 ;
1862 }
1863 else if(res_empty >= 0)
1864 {
1865 pAsm->hw_cfile_addr[ res_empty ] = sel;
1866 pAsm->hw_cfile_chan[ res_empty ] = chan;
1867 }
1868 else
1869 {
1870 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1871 return GL_FALSE;
1872 }
1873 return GL_TRUE;
1874 }
1875
1876 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1877 {
1878 if(pAsm->hw_gpr[cycle][chan] < 0)
1879 {
1880 pAsm->hw_gpr[cycle][chan] = sel;
1881 }
1882 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1883 {
1884 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1885 return GL_FALSE;
1886 }
1887
1888 return GL_TRUE;
1889 }
1890
1891 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1892 {
1893 switch (swiz)
1894 {
1895 case SQ_ALU_SCL_210:
1896 {
1897 int table[3] = {2, 1, 0};
1898 *pCycle = table[sel];
1899 return GL_TRUE;
1900 }
1901 break;
1902 case SQ_ALU_SCL_122:
1903 {
1904 int table[3] = {1, 2, 2};
1905 *pCycle = table[sel];
1906 return GL_TRUE;
1907 }
1908 break;
1909 case SQ_ALU_SCL_212:
1910 {
1911 int table[3] = {2, 1, 2};
1912 *pCycle = table[sel];
1913 return GL_TRUE;
1914 }
1915 break;
1916 case SQ_ALU_SCL_221:
1917 {
1918 int table[3] = {2, 2, 1};
1919 *pCycle = table[sel];
1920 return GL_TRUE;
1921 }
1922 break;
1923 default:
1924 radeon_error("Bad Scalar bank swizzle value\n");
1925 break;
1926 }
1927
1928 return GL_FALSE;
1929 }
1930
1931 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1932 {
1933 switch (swiz)
1934 {
1935 case SQ_ALU_VEC_012:
1936 {
1937 int table[3] = {0, 1, 2};
1938 *pCycle = table[sel];
1939 }
1940 break;
1941 case SQ_ALU_VEC_021:
1942 {
1943 int table[3] = {0, 2, 1};
1944 *pCycle = table[sel];
1945 }
1946 break;
1947 case SQ_ALU_VEC_120:
1948 {
1949 int table[3] = {1, 2, 0};
1950 *pCycle = table[sel];
1951 }
1952 break;
1953 case SQ_ALU_VEC_102:
1954 {
1955 int table[3] = {1, 0, 2};
1956 *pCycle = table[sel];
1957 }
1958 break;
1959 case SQ_ALU_VEC_201:
1960 {
1961 int table[3] = {2, 0, 1};
1962 *pCycle = table[sel];
1963 }
1964 break;
1965 case SQ_ALU_VEC_210:
1966 {
1967 int table[3] = {2, 1, 0};
1968 *pCycle = table[sel];
1969 }
1970 break;
1971 default:
1972 radeon_error("Bad Vec bank swizzle value\n");
1973 return GL_FALSE;
1974 break;
1975 }
1976
1977 return GL_TRUE;
1978 }
1979
1980 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1981 R700ALUInstruction* alu_instruction_ptr)
1982 {
1983 GLuint cycle;
1984 GLuint bank_swizzle;
1985 GLuint const_count = 0;
1986
1987 BITS sel;
1988 BITS chan;
1989 BITS rel;
1990 BITS neg;
1991
1992 GLuint src;
1993
1994 BITS src_sel [3] = {0,0,0};
1995 BITS src_chan[3] = {0,0,0};
1996 BITS src_rel [3] = {0,0,0};
1997 BITS src_neg [3] = {0,0,0};
1998
1999 GLuint swizzle_key;
2000
2001 GLuint number_of_operands = r700GetNumOperands(pAsm);
2002
2003 for (src=0; src<number_of_operands; src++)
2004 {
2005 get_src_properties(alu_instruction_ptr,
2006 src,
2007 &(src_sel[src]),
2008 &(src_rel[src]),
2009 &(src_chan[src]),
2010 &(src_neg[src]) );
2011 }
2012
2013
2014 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2015 (is_const( src_sel[1] ) ? 2 : 0) +
2016 (is_const( src_sel[2] ) ? 1 : 0) );
2017
2018 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2019
2020 for (src=0; src<number_of_operands; src++)
2021 {
2022 sel = src_sel [src];
2023 chan = src_chan[src];
2024 rel = src_rel [src];
2025 neg = src_neg [src];
2026
2027 if (is_const( sel ))
2028 {
2029 // Any constant, including literal and inline constants
2030 const_count++;
2031
2032 if (is_cfile( sel ))
2033 {
2034 reserve_cfile(pAsm, sel, chan);
2035 }
2036
2037 }
2038 }
2039
2040 for (src=0; src<number_of_operands; src++)
2041 {
2042 sel = src_sel [src];
2043 chan = src_chan[src];
2044 rel = src_rel [src];
2045 neg = src_neg [src];
2046
2047 if( is_gpr(sel) )
2048 {
2049 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2050
2051 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2052 {
2053 return GL_FALSE;
2054 }
2055
2056 if(cycle < const_count)
2057 {
2058 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2059 {
2060 return GL_FALSE;
2061 }
2062 }
2063 }
2064 }
2065
2066 return GL_TRUE;
2067 }
2068
2069 GLboolean check_vector(r700_AssemblerBase* pAsm,
2070 R700ALUInstruction* alu_instruction_ptr)
2071 {
2072 GLuint cycle;
2073 GLuint bank_swizzle;
2074 GLuint const_count = 0;
2075
2076 GLuint src;
2077
2078 BITS sel;
2079 BITS chan;
2080 BITS rel;
2081 BITS neg;
2082
2083 BITS src_sel [3] = {0,0,0};
2084 BITS src_chan[3] = {0,0,0};
2085 BITS src_rel [3] = {0,0,0};
2086 BITS src_neg [3] = {0,0,0};
2087
2088 GLuint swizzle_key;
2089
2090 GLuint number_of_operands = r700GetNumOperands(pAsm);
2091
2092 for (src=0; src<number_of_operands; src++)
2093 {
2094 get_src_properties(alu_instruction_ptr,
2095 src,
2096 &(src_sel[src]),
2097 &(src_rel[src]),
2098 &(src_chan[src]),
2099 &(src_neg[src]) );
2100 }
2101
2102
2103 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2104 (is_const( src_sel[1] ) ? 2 : 0) +
2105 (is_const( src_sel[2] ) ? 1 : 0)
2106 );
2107
2108 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2109
2110 for (src=0; src<number_of_operands; src++)
2111 {
2112 sel = src_sel [src];
2113 chan = src_chan[src];
2114 rel = src_rel [src];
2115 neg = src_neg [src];
2116
2117
2118 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2119
2120 if( is_gpr(sel) )
2121 {
2122 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2123 {
2124 return GL_FALSE;
2125 }
2126
2127 if ( (src == 1) &&
2128 (sel == src_sel[0]) &&
2129 (chan == src_chan[0]) )
2130 {
2131 }
2132 else
2133 {
2134 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2135 {
2136 return GL_FALSE;
2137 }
2138 }
2139 }
2140 else if( is_const(sel) )
2141 {
2142 const_count++;
2143
2144 if( is_cfile(sel) )
2145 {
2146 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2147 {
2148 return GL_FALSE;
2149 }
2150 }
2151 }
2152 }
2153
2154 return GL_TRUE;
2155 }
2156
2157 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2158 {
2159 GLuint number_of_scalar_operations;
2160 GLboolean is_single_scalar_operation;
2161 GLuint scalar_channel_index;
2162
2163 PVSSRC * pcurrent_source;
2164 int current_source_index;
2165 GLuint contiguous_slots_needed;
2166
2167 GLuint uNumSrc = r700GetNumOperands(pAsm);
2168 //GLuint channel_swizzle, j;
2169 //GLuint chan_counter[4] = {0, 0, 0, 0};
2170 //PVSSRC * pSource[3];
2171 GLboolean bSplitInst = GL_FALSE;
2172
2173 if (1 == pAsm->D.dst.math)
2174 {
2175 is_single_scalar_operation = GL_TRUE;
2176 number_of_scalar_operations = 1;
2177 }
2178 else
2179 {
2180 is_single_scalar_operation = GL_FALSE;
2181 number_of_scalar_operations = 4;
2182
2183 /* current assembler doesn't do more than 1 register per source */
2184 #if 0
2185 /* check read port, only very preliminary algorithm, not count in
2186 src0/1 same comp case and prev slot repeat case; also not count relative
2187 addressing. TODO: improve performance. */
2188 for(j=0; j<uNumSrc; j++)
2189 {
2190 pSource[j] = &(pAsm->S[j].src);
2191 }
2192 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2193 {
2194 for(j=0; j<uNumSrc; j++)
2195 {
2196 switch (scalar_channel_index)
2197 {
2198 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2199 case 1: channel_swizzle = pSource[j]->swizzley; break;
2200 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2201 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2202 default: channel_swizzle = SQ_SEL_MASK; break;
2203 }
2204 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2205 (pSource[j]->rtype == SRC_REG_INPUT))
2206 && (channel_swizzle <= SQ_SEL_W) )
2207 {
2208 chan_counter[channel_swizzle]++;
2209 }
2210 }
2211 }
2212 if( (chan_counter[SQ_SEL_X] > 3)
2213 || (chan_counter[SQ_SEL_Y] > 3)
2214 || (chan_counter[SQ_SEL_Z] > 3)
2215 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2216 {
2217 bSplitInst = GL_TRUE;
2218 }
2219 #endif
2220 }
2221
2222 contiguous_slots_needed = 0;
2223
2224 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2225 {
2226 contiguous_slots_needed = 4;
2227 }
2228
2229 initialize(pAsm);
2230
2231 for (scalar_channel_index=0;
2232 scalar_channel_index < number_of_scalar_operations;
2233 scalar_channel_index++)
2234 {
2235 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2236 if (alu_instruction_ptr == NULL)
2237 {
2238 return GL_FALSE;
2239 }
2240 Init_R700ALUInstruction(alu_instruction_ptr);
2241
2242 //src 0
2243 current_source_index = 0;
2244 pcurrent_source = &(pAsm->S[0].src);
2245
2246 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2247 current_source_index,
2248 pcurrent_source,
2249 scalar_channel_index) )
2250 {
2251 return GL_FALSE;
2252 }
2253
2254 if (uNumSrc > 1)
2255 {
2256 // Process source 1
2257 current_source_index = 1;
2258 pcurrent_source = &(pAsm->S[current_source_index].src);
2259
2260 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2261 current_source_index,
2262 pcurrent_source,
2263 scalar_channel_index) )
2264 {
2265 return GL_FALSE;
2266 }
2267 }
2268
2269 //other bits
2270 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
2271
2272 if( (is_single_scalar_operation == GL_TRUE)
2273 || (GL_TRUE == bSplitInst) )
2274 {
2275 alu_instruction_ptr->m_Word0.f.last = 1;
2276 }
2277 else
2278 {
2279 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2280 }
2281
2282 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2283 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2284 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2285
2286 // dst
2287 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2288 (pAsm->D.dst.rtype == DST_REG_OUT) )
2289 {
2290 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2291 }
2292 else
2293 {
2294 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2295 return GL_FALSE;
2296 }
2297
2298 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2299
2300 if ( is_single_scalar_operation == GL_TRUE )
2301 {
2302 // Override scalar_channel_index since only one scalar value will be written
2303 if(pAsm->D.dst.writex)
2304 {
2305 scalar_channel_index = 0;
2306 }
2307 else if(pAsm->D.dst.writey)
2308 {
2309 scalar_channel_index = 1;
2310 }
2311 else if(pAsm->D.dst.writez)
2312 {
2313 scalar_channel_index = 2;
2314 }
2315 else if(pAsm->D.dst.writew)
2316 {
2317 scalar_channel_index = 3;
2318 }
2319 }
2320
2321 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2322
2323 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2324
2325 if (pAsm->D.dst.op3)
2326 {
2327 //op3
2328
2329 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2330
2331 //There's 3rd src for op3
2332 current_source_index = 2;
2333 pcurrent_source = &(pAsm->S[current_source_index].src);
2334
2335 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2336 current_source_index,
2337 pcurrent_source,
2338 scalar_channel_index) )
2339 {
2340 return GL_FALSE;
2341 }
2342 }
2343 else
2344 {
2345 //op2
2346 if (pAsm->bR6xx)
2347 {
2348 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2349
2350 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2351 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2352
2353 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2354 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2355 switch (scalar_channel_index)
2356 {
2357 case 0:
2358 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2359 break;
2360 case 1:
2361 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2362 break;
2363 case 2:
2364 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2365 break;
2366 case 3:
2367 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2368 break;
2369 default:
2370 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2371 break;
2372 }
2373 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2374 }
2375 else
2376 {
2377 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2378
2379 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2380 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2381
2382 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2383 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2384 switch (scalar_channel_index)
2385 {
2386 case 0:
2387 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2388 break;
2389 case 1:
2390 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2391 break;
2392 case 2:
2393 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2394 break;
2395 case 3:
2396 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2397 break;
2398 default:
2399 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2400 break;
2401 }
2402 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2403 }
2404 }
2405
2406 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2407 {
2408 return GL_FALSE;
2409 }
2410
2411 /*
2412 * Judge the type of current instruction, is it vector or scalar
2413 * instruction.
2414 */
2415 if (is_single_scalar_operation)
2416 {
2417 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2418 {
2419 return GL_FALSE;
2420 }
2421 }
2422 else
2423 {
2424 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2425 {
2426 return 1;
2427 }
2428 }
2429
2430 contiguous_slots_needed = 0;
2431 }
2432
2433 return GL_TRUE;
2434 }
2435
2436 GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
2437 {
2438 GLuint number_of_scalar_operations;
2439 GLboolean is_single_scalar_operation;
2440 GLuint scalar_channel_index;
2441
2442 PVSSRC * pcurrent_source;
2443 int current_source_index;
2444 GLuint contiguous_slots_needed;
2445
2446 GLuint uNumSrc = r700GetNumOperands(pAsm);
2447
2448 GLboolean bSplitInst = GL_FALSE;
2449
2450 if (1 == pAsm->D.dst.math)
2451 {
2452 is_single_scalar_operation = GL_TRUE;
2453 number_of_scalar_operations = 1;
2454 }
2455 else
2456 {
2457 is_single_scalar_operation = GL_FALSE;
2458 number_of_scalar_operations = 4;
2459 }
2460
2461 contiguous_slots_needed = 0;
2462
2463 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2464 {
2465 contiguous_slots_needed = 4;
2466 }
2467
2468 initialize(pAsm);
2469
2470 for (scalar_channel_index=0;
2471 scalar_channel_index < number_of_scalar_operations;
2472 scalar_channel_index++)
2473 {
2474 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2475 if (alu_instruction_ptr == NULL)
2476 {
2477 return GL_FALSE;
2478 }
2479 Init_R700ALUInstruction(alu_instruction_ptr);
2480
2481 //src 0
2482 current_source_index = 0;
2483 pcurrent_source = &(pAsm->S[0].src);
2484
2485 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2486 current_source_index,
2487 pcurrent_source,
2488 scalar_channel_index) )
2489 {
2490 return GL_FALSE;
2491 }
2492
2493 if (uNumSrc > 1)
2494 {
2495 // Process source 1
2496 current_source_index = 1;
2497 pcurrent_source = &(pAsm->S[current_source_index].src);
2498
2499 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2500 current_source_index,
2501 pcurrent_source,
2502 scalar_channel_index) )
2503 {
2504 return GL_FALSE;
2505 }
2506 }
2507
2508 //other bits
2509 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2510
2511 if( (is_single_scalar_operation == GL_TRUE)
2512 || (GL_TRUE == bSplitInst) )
2513 {
2514 alu_instruction_ptr->m_Word0.f.last = 1;
2515 }
2516 else
2517 {
2518 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2519 }
2520
2521 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
2522 if(1 == pAsm->D.dst.predicated)
2523 {
2524 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2525 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2526 }
2527 else
2528 {
2529 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2530 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2531 }
2532
2533 // dst
2534 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2535 (pAsm->D.dst.rtype == DST_REG_OUT) )
2536 {
2537 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2538 }
2539 else
2540 {
2541 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2542 return GL_FALSE;
2543 }
2544
2545 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2546
2547 if ( is_single_scalar_operation == GL_TRUE )
2548 {
2549 // Override scalar_channel_index since only one scalar value will be written
2550 if(pAsm->D.dst.writex)
2551 {
2552 scalar_channel_index = 0;
2553 }
2554 else if(pAsm->D.dst.writey)
2555 {
2556 scalar_channel_index = 1;
2557 }
2558 else if(pAsm->D.dst.writez)
2559 {
2560 scalar_channel_index = 2;
2561 }
2562 else if(pAsm->D.dst.writew)
2563 {
2564 scalar_channel_index = 3;
2565 }
2566 }
2567
2568 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2569
2570 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2571
2572 if (pAsm->D.dst.op3)
2573 {
2574 //op3
2575
2576 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2577
2578 //There's 3rd src for op3
2579 current_source_index = 2;
2580 pcurrent_source = &(pAsm->S[current_source_index].src);
2581
2582 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2583 current_source_index,
2584 pcurrent_source,
2585 scalar_channel_index) )
2586 {
2587 return GL_FALSE;
2588 }
2589 }
2590 else
2591 {
2592 //op2
2593 if (pAsm->bR6xx)
2594 {
2595 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2596
2597 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2598 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2599
2600 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2601 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2602 switch (scalar_channel_index)
2603 {
2604 case 0:
2605 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2606 break;
2607 case 1:
2608 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2609 break;
2610 case 2:
2611 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2612 break;
2613 case 3:
2614 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2615 break;
2616 default:
2617 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2618 break;
2619 }
2620 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2621 }
2622 else
2623 {
2624 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2625
2626 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2627 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2628
2629 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2630 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2631 switch (scalar_channel_index)
2632 {
2633 case 0:
2634 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2635 break;
2636 case 1:
2637 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2638 break;
2639 case 2:
2640 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2641 break;
2642 case 3:
2643 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2644 break;
2645 default:
2646 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2647 break;
2648 }
2649 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2650 }
2651 }
2652
2653 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2654 {
2655 return GL_FALSE;
2656 }
2657
2658 /*
2659 * Judge the type of current instruction, is it vector or scalar
2660 * instruction.
2661 */
2662 if (is_single_scalar_operation)
2663 {
2664 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2665 {
2666 return GL_FALSE;
2667 }
2668 }
2669 else
2670 {
2671 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2672 {
2673 return 1;
2674 }
2675 }
2676
2677 contiguous_slots_needed = 0;
2678 }
2679
2680 return GL_TRUE;
2681 }
2682
2683 GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
2684 {
2685 R700ALUInstruction * alu_instruction_ptr;
2686 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2687 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2688
2689 GLuint number_of_scalar_operations;
2690 GLboolean is_single_scalar_operation;
2691 GLuint scalar_channel_index;
2692
2693 GLuint contiguous_slots_needed;
2694 GLuint lastInstruction;
2695 GLuint not_masked[4];
2696
2697 GLuint uNumSrc = r700GetNumOperands(pAsm);
2698
2699 GLboolean bSplitInst = GL_FALSE;
2700
2701 number_of_scalar_operations = 0;
2702 contiguous_slots_needed = 0;
2703
2704 if(1 == pAsm->D.dst.writew)
2705 {
2706 lastInstruction = 3;
2707 number_of_scalar_operations++;
2708 not_masked[3] = 1;
2709 }
2710 else
2711 {
2712 not_masked[3] = 0;
2713 }
2714 if(1 == pAsm->D.dst.writez)
2715 {
2716 lastInstruction = 2;
2717 number_of_scalar_operations++;
2718 not_masked[2] = 1;
2719 }
2720 else
2721 {
2722 not_masked[2] = 0;
2723 }
2724 if(1 == pAsm->D.dst.writey)
2725 {
2726 lastInstruction = 1;
2727 number_of_scalar_operations++;
2728 not_masked[1] = 1;
2729 }
2730 else
2731 {
2732 not_masked[1] = 0;
2733 }
2734 if(1 == pAsm->D.dst.writex)
2735 {
2736 lastInstruction = 0;
2737 number_of_scalar_operations++;
2738 not_masked[0] = 1;
2739 }
2740 else
2741 {
2742 not_masked[0] = 0;
2743 }
2744
2745 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2746 {
2747 contiguous_slots_needed = 4;
2748 }
2749 else
2750 {
2751 contiguous_slots_needed = number_of_scalar_operations;
2752 }
2753
2754 if(1 == pAsm->D2.dst2.literal)
2755 {
2756 contiguous_slots_needed += 1;
2757 }
2758 else if(2 == pAsm->D2.dst2.literal)
2759 {
2760 contiguous_slots_needed += 2;
2761 }
2762
2763 initialize(pAsm);
2764
2765 for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++)
2766 {
2767 if(0 == not_masked[scalar_channel_index])
2768 {
2769 continue;
2770 }
2771
2772 if(scalar_channel_index == lastInstruction)
2773 {
2774 switch (pAsm->D2.dst2.literal)
2775 {
2776 case 0:
2777 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2778 if (alu_instruction_ptr == NULL)
2779 {
2780 return GL_FALSE;
2781 }
2782 Init_R700ALUInstruction(alu_instruction_ptr);
2783 break;
2784 case 1:
2785 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2786 if (alu_instruction_ptr_hl == NULL)
2787 {
2788 return GL_FALSE;
2789 }
2790 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]);
2791 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2792 break;
2793 case 2:
2794 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2795 if (alu_instruction_ptr_fl == NULL)
2796 {
2797 return GL_FALSE;
2798 }
2799 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]);
2800 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2801 break;
2802 default:
2803 break;
2804 };
2805 }
2806 else
2807 {
2808 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2809 if (alu_instruction_ptr == NULL)
2810 {
2811 return GL_FALSE;
2812 }
2813 Init_R700ALUInstruction(alu_instruction_ptr);
2814 }
2815
2816 //src 0
2817 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2818 0,
2819 &(pAsm->S[0].src),
2820 scalar_channel_index) )
2821 {
2822 return GL_FALSE;
2823 }
2824
2825 if (uNumSrc > 1)
2826 {
2827 // Process source 1
2828 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2829 1,
2830 &(pAsm->S[1].src),
2831 scalar_channel_index) )
2832 {
2833 return GL_FALSE;
2834 }
2835 }
2836
2837 //other bits
2838 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2839
2840 if(scalar_channel_index == lastInstruction)
2841 {
2842 alu_instruction_ptr->m_Word0.f.last = 1;
2843 }
2844
2845 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2846 if(1 == pAsm->D.dst.predicated)
2847 {
2848 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2849 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2850 }
2851 else
2852 {
2853 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0;
2854 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0;
2855 }
2856
2857 // dst
2858 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2859 (pAsm->D.dst.rtype == DST_REG_OUT) )
2860 {
2861 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2862 }
2863 else
2864 {
2865 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2866 return GL_FALSE;
2867 }
2868
2869 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2870
2871 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2872
2873 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2874
2875 if (pAsm->D.dst.op3)
2876 {
2877 //op3
2878 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2879
2880 //There's 3rd src for op3
2881 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2882 2,
2883 &(pAsm->S[2].src),
2884 scalar_channel_index) )
2885 {
2886 return GL_FALSE;
2887 }
2888 }
2889 else
2890 {
2891 //op2
2892 if (pAsm->bR6xx)
2893 {
2894 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2895 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2896 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2897 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1;
2898 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2899 }
2900 else
2901 {
2902 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2903 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2904 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2905 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1;
2906 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2907 }
2908 }
2909
2910 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2911 {
2912 return GL_FALSE;
2913 }
2914
2915 if (1 == number_of_scalar_operations)
2916 {
2917 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2918 {
2919 return GL_FALSE;
2920 }
2921 }
2922 else
2923 {
2924 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2925 {
2926 return GL_FALSE;
2927 }
2928 }
2929
2930 contiguous_slots_needed -= 2;
2931 }
2932
2933 return GL_TRUE;
2934 }
2935
2936 GLboolean next_ins(r700_AssemblerBase *pAsm)
2937 {
2938 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2939
2940 if( GL_TRUE == pAsm->is_tex )
2941 {
2942 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2943 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2944 {
2945 radeon_error("Error assembling TEX instruction\n");
2946 return GL_FALSE;
2947 }
2948 } else {
2949 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2950 {
2951 radeon_error("Error assembling TEX instruction\n");
2952 return GL_FALSE;
2953 }
2954 }
2955 }
2956 else
2957 { //ALU
2958 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2959 {
2960 radeon_error("Error assembling ALU instruction\n");
2961 return GL_FALSE;
2962 }
2963 }
2964
2965 if(pAsm->D.dst.rtype == DST_REG_OUT)
2966 {
2967 if(pAsm->D.dst.op3)
2968 {
2969 // There is no mask for OP3 instructions, so all channels are written
2970 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2971 }
2972 else
2973 {
2974 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2975 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2976 }
2977 }
2978
2979 //reset for next inst.
2980 pAsm->D.bits = 0;
2981 pAsm->D2.bits = 0;
2982 pAsm->S[0].bits = 0;
2983 pAsm->S[1].bits = 0;
2984 pAsm->S[2].bits = 0;
2985 pAsm->is_tex = GL_FALSE;
2986 pAsm->need_tex_barrier = GL_FALSE;
2987
2988 return GL_TRUE;
2989 }
2990
2991 GLboolean next_ins2(r700_AssemblerBase *pAsm)
2992 {
2993 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2994
2995 //ALU
2996 if( GL_FALSE == assemble_alu_instruction2(pAsm) )
2997 {
2998 radeon_error("Error assembling ALU instruction\n");
2999 return GL_FALSE;
3000 }
3001
3002 if(pAsm->D.dst.rtype == DST_REG_OUT)
3003 {
3004 if(pAsm->D.dst.op3)
3005 {
3006 // There is no mask for OP3 instructions, so all channels are written
3007 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
3008 }
3009 else
3010 {
3011 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
3012 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
3013 }
3014 }
3015
3016 //reset for next inst.
3017 pAsm->D.bits = 0;
3018 pAsm->D2.bits = 0;
3019 pAsm->S[0].bits = 0;
3020 pAsm->S[1].bits = 0;
3021 pAsm->S[2].bits = 0;
3022 pAsm->is_tex = GL_FALSE;
3023 pAsm->need_tex_barrier = GL_FALSE;
3024
3025 //richard nov.16 glsl
3026 pAsm->D2.bits = 0;
3027
3028 return GL_TRUE;
3029 }
3030
3031 /* not work yet */
3032 GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
3033 {
3034 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3035
3036 //ALU
3037 if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) )
3038 {
3039 radeon_error("Error assembling ALU instruction\n");
3040 return GL_FALSE;
3041 }
3042
3043 //reset for next inst.
3044 pAsm->D.bits = 0;
3045 pAsm->D2.bits = 0;
3046 pAsm->S[0].bits = 0;
3047 pAsm->S[1].bits = 0;
3048 pAsm->S[2].bits = 0;
3049 pAsm->is_tex = GL_FALSE;
3050 pAsm->need_tex_barrier = GL_FALSE;
3051 return GL_TRUE;
3052 }
3053
3054 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
3055 {
3056 BITS tmp;
3057
3058 checkop1(pAsm);
3059
3060 tmp = gethelpr(pAsm);
3061
3062 // opcode tmp.x, a.x
3063 // MOV dst, tmp.x
3064
3065 pAsm->D.dst.opcode = opcode;
3066 pAsm->D.dst.math = 1;
3067
3068 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3069 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3070 pAsm->D.dst.reg = tmp;
3071 pAsm->D.dst.writex = 1;
3072
3073 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3074 {
3075 return GL_FALSE;
3076 }
3077
3078 if ( GL_FALSE == next_ins(pAsm) )
3079 {
3080 return GL_FALSE;
3081 }
3082
3083 // Now replicate result to all necessary channels in destination
3084 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3085
3086 if( GL_FALSE == assemble_dst(pAsm) )
3087 {
3088 return GL_FALSE;
3089 }
3090
3091 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3092 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3093 pAsm->S[0].src.reg = tmp;
3094
3095 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3096 noneg_PVSSRC(&(pAsm->S[0].src));
3097
3098 if( GL_FALSE == next_ins(pAsm) )
3099 {
3100 return GL_FALSE;
3101 }
3102
3103 return GL_TRUE;
3104 }
3105
3106 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
3107 {
3108 checkop1(pAsm);
3109
3110 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3111
3112 if( GL_FALSE == assemble_dst(pAsm) )
3113 {
3114 return GL_FALSE;
3115 }
3116 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3117 {
3118 return GL_FALSE;
3119 }
3120
3121 pAsm->S[1].bits = pAsm->S[0].bits;
3122 flipneg_PVSSRC(&(pAsm->S[1].src));
3123
3124 if ( GL_FALSE == next_ins(pAsm) )
3125 {
3126 return GL_FALSE;
3127 }
3128
3129 return GL_TRUE;
3130 }
3131
3132 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
3133 {
3134 if( GL_FALSE == checkop2(pAsm) )
3135 {
3136 return GL_FALSE;
3137 }
3138
3139 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3140
3141 if( GL_FALSE == assemble_dst(pAsm) )
3142 {
3143 return GL_FALSE;
3144 }
3145
3146 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3147 {
3148 return GL_FALSE;
3149 }
3150
3151 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3152 {
3153 return GL_FALSE;
3154 }
3155
3156 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
3157 {
3158 flipneg_PVSSRC(&(pAsm->S[1].src));
3159 }
3160
3161 if( GL_FALSE == next_ins(pAsm) )
3162 {
3163 return GL_FALSE;
3164 }
3165
3166 return GL_TRUE;
3167 }
3168
3169 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
3170 { /* TODO: ar values dont' persist between clauses */
3171 if( GL_FALSE == checkop1(pAsm) )
3172 {
3173 return GL_FALSE;
3174 }
3175
3176 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
3177 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3178 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3179 pAsm->D.dst.reg = 0;
3180 pAsm->D.dst.writex = 0;
3181 pAsm->D.dst.writey = 0;
3182 pAsm->D.dst.writez = 0;
3183 pAsm->D.dst.writew = 0;
3184
3185 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3186 {
3187 return GL_FALSE;
3188 }
3189
3190 if( GL_FALSE == next_ins(pAsm) )
3191 {
3192 return GL_FALSE;
3193 }
3194
3195 return GL_TRUE;
3196 }
3197
3198 GLboolean assemble_BAD(char *opcode_str)
3199 {
3200 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
3201 return GL_FALSE;
3202 }
3203
3204 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
3205 {
3206 int tmp;
3207
3208 if( GL_FALSE == checkop3(pAsm) )
3209 {
3210 return GL_FALSE;
3211 }
3212
3213 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
3214 pAsm->D.dst.op3 = 1;
3215
3216 tmp = (-1);
3217
3218 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3219 {
3220 //OP3 has no support for write mask
3221 tmp = gethelpr(pAsm);
3222
3223 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3224 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3225 pAsm->D.dst.reg = tmp;
3226
3227 nomask_PVSDST(&(pAsm->D.dst));
3228 }
3229 else
3230 {
3231 if( GL_FALSE == assemble_dst(pAsm) )
3232 {
3233 return GL_FALSE;
3234 }
3235 }
3236
3237 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3238 {
3239 return GL_FALSE;
3240 }
3241
3242 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
3243 {
3244 return GL_FALSE;
3245 }
3246
3247 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
3248 {
3249 return GL_FALSE;
3250 }
3251
3252 if ( GL_FALSE == next_ins(pAsm) )
3253 {
3254 return GL_FALSE;
3255 }
3256
3257 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3258 {
3259 if( GL_FALSE == assemble_dst(pAsm) )
3260 {
3261 return GL_FALSE;
3262 }
3263
3264 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3265
3266 //tmp for source
3267 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3268 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3269 pAsm->S[0].src.reg = tmp;
3270
3271 noneg_PVSSRC(&(pAsm->S[0].src));
3272 noswizzle_PVSSRC(&(pAsm->S[0].src));
3273
3274 if( GL_FALSE == next_ins(pAsm) )
3275 {
3276 return GL_FALSE;
3277 }
3278 }
3279
3280 return GL_TRUE;
3281 }
3282
3283 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
3284 {
3285 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
3286 }
3287
3288 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
3289 {
3290 if( GL_FALSE == checkop2(pAsm) )
3291 {
3292 return GL_FALSE;
3293 }
3294
3295 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
3296
3297 if( GL_FALSE == assemble_dst(pAsm) )
3298 {
3299 return GL_FALSE;
3300 }
3301
3302 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3303 {
3304 return GL_FALSE;
3305 }
3306
3307 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3308 {
3309 return GL_FALSE;
3310 }
3311
3312 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3313 {
3314 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3315 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3316 }
3317 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
3318 {
3319 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3320 }
3321
3322 if ( GL_FALSE == next_ins(pAsm) )
3323 {
3324 return GL_FALSE;
3325 }
3326
3327 return GL_TRUE;
3328 }
3329
3330 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3331 {
3332 if( GL_FALSE == checkop2(pAsm) )
3333 {
3334 return GL_FALSE;
3335 }
3336
3337 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3338
3339 if( GL_FALSE == assemble_dst(pAsm) )
3340 {
3341 return GL_FALSE;
3342 }
3343
3344 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3345 {
3346 return GL_FALSE;
3347 }
3348
3349 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3350 {
3351 return GL_FALSE;
3352 }
3353
3354 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3355 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3356
3357 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3358 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3359
3360 if ( GL_FALSE == next_ins(pAsm) )
3361 {
3362 return GL_FALSE;
3363 }
3364
3365 return GL_TRUE;
3366 }
3367
3368 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3369 {
3370 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3371 }
3372
3373 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3374 {
3375 BITS tmp;
3376
3377 checkop1(pAsm);
3378
3379 tmp = gethelpr(pAsm);
3380
3381 // FLOOR tmp.x, a.x
3382 // EX2 dst.x tmp.x
3383
3384 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3385 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3386
3387 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3388 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3389 pAsm->D.dst.reg = tmp;
3390 pAsm->D.dst.writex = 1;
3391
3392 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3393 {
3394 return GL_FALSE;
3395 }
3396
3397 if( GL_FALSE == next_ins(pAsm) )
3398 {
3399 return GL_FALSE;
3400 }
3401
3402 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3403 pAsm->D.dst.math = 1;
3404
3405 if( GL_FALSE == assemble_dst(pAsm) )
3406 {
3407 return GL_FALSE;
3408 }
3409
3410 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3411
3412 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3413 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3414 pAsm->S[0].src.reg = tmp;
3415
3416 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3417 noneg_PVSSRC(&(pAsm->S[0].src));
3418
3419 if( GL_FALSE == next_ins(pAsm) )
3420 {
3421 return GL_FALSE;
3422 }
3423 }
3424
3425 // FRACT dst.y a.x
3426
3427 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3428 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3429
3430 if( GL_FALSE == assemble_dst(pAsm) )
3431 {
3432 return GL_FALSE;
3433 }
3434
3435 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3436 {
3437 return GL_FALSE;
3438 }
3439
3440 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3441
3442 if( GL_FALSE == next_ins(pAsm) )
3443 {
3444 return GL_FALSE;
3445 }
3446 }
3447
3448 // EX2 dst.z, a.x
3449
3450 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3451 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3452 pAsm->D.dst.math = 1;
3453
3454 if( GL_FALSE == assemble_dst(pAsm) )
3455 {
3456 return GL_FALSE;
3457 }
3458
3459 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3460 {
3461 return GL_FALSE;
3462 }
3463
3464 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3465
3466 if( GL_FALSE == next_ins(pAsm) )
3467 {
3468 return GL_FALSE;
3469 }
3470 }
3471
3472 // MOV dst.w 1.0
3473
3474 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3475 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3476
3477 if( GL_FALSE == assemble_dst(pAsm) )
3478 {
3479 return GL_FALSE;
3480 }
3481
3482 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3483
3484 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3485 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3486 pAsm->S[0].src.reg = tmp;
3487
3488 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3489 noneg_PVSSRC(&(pAsm->S[0].src));
3490
3491 if( GL_FALSE == next_ins(pAsm) )
3492 {
3493 return GL_FALSE;
3494 }
3495 }
3496
3497 return GL_TRUE;
3498 }
3499
3500 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3501 {
3502 checkop1(pAsm);
3503
3504 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3505
3506 if ( GL_FALSE == assemble_dst(pAsm) )
3507 {
3508 return GL_FALSE;
3509 }
3510
3511 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3512 {
3513 return GL_FALSE;
3514 }
3515
3516 if ( GL_FALSE == next_ins(pAsm) )
3517 {
3518 return GL_FALSE;
3519 }
3520
3521 return GL_TRUE;
3522 }
3523
3524 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3525 {
3526 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3527 }
3528
3529 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3530 {
3531 checkop1(pAsm);
3532
3533 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3534
3535 if ( GL_FALSE == assemble_dst(pAsm) )
3536 {
3537 return GL_FALSE;
3538 }
3539
3540 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3541 {
3542 return GL_FALSE;
3543 }
3544
3545 if ( GL_FALSE == next_ins(pAsm) )
3546 {
3547 return GL_FALSE;
3548 }
3549
3550 return GL_TRUE;
3551 }
3552
3553 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
3554 {
3555 /* TODO: doc says KILL has to be last(end) ALU clause */
3556
3557 checkop1(pAsm);
3558
3559 pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
3560
3561 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3562 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3563 pAsm->D.dst.reg = 0;
3564 pAsm->D.dst.writex = 0;
3565 pAsm->D.dst.writey = 0;
3566 pAsm->D.dst.writez = 0;
3567 pAsm->D.dst.writew = 0;
3568
3569 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3570 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3571 pAsm->S[0].src.reg = 0;
3572
3573 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
3574 noneg_PVSSRC(&(pAsm->S[0].src));
3575
3576 if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
3577 {
3578 return GL_FALSE;
3579 }
3580
3581 if ( GL_FALSE == next_ins(pAsm) )
3582 {
3583 return GL_FALSE;
3584 }
3585
3586 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3587
3588 return GL_TRUE;
3589 }
3590
3591 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3592 {
3593 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3594 }
3595
3596 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3597 {
3598 BITS tmp;
3599
3600 if( GL_FALSE == checkop3(pAsm) )
3601 {
3602 return GL_FALSE;
3603 }
3604
3605 tmp = gethelpr(pAsm);
3606
3607 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3608
3609 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3610 pAsm->D.dst.reg = tmp;
3611 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3612 nomask_PVSDST(&(pAsm->D.dst));
3613
3614
3615 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3616 {
3617 return GL_FALSE;
3618 }
3619
3620 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3621 {
3622 return GL_FALSE;
3623 }
3624
3625 neg_PVSSRC(&(pAsm->S[1].src));
3626
3627 if( GL_FALSE == next_ins(pAsm) )
3628 {
3629 return GL_FALSE;
3630 }
3631
3632 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3633 pAsm->D.dst.op3 = 1;
3634
3635 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3636 pAsm->D.dst.reg = tmp;
3637 nomask_PVSDST(&(pAsm->D.dst));
3638 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3639
3640 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3641 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3642 pAsm->S[0].src.reg = tmp;
3643 noswizzle_PVSSRC(&(pAsm->S[0].src));
3644
3645
3646 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3647 {
3648 return GL_FALSE;
3649 }
3650 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3651 {
3652 return GL_FALSE;
3653 }
3654
3655 if( GL_FALSE == next_ins(pAsm) )
3656 {
3657 return GL_FALSE;
3658 }
3659
3660 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3661
3662 if( GL_FALSE == assemble_dst(pAsm) )
3663 {
3664 return GL_FALSE;
3665 }
3666
3667 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3668 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3669 pAsm->S[0].src.reg = tmp;
3670 noswizzle_PVSSRC(&(pAsm->S[0].src));
3671
3672 if( GL_FALSE == next_ins(pAsm) )
3673 {
3674 return GL_FALSE;
3675 }
3676
3677 return GL_TRUE;
3678 }
3679
3680 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3681 {
3682 BITS tmp1, tmp2, tmp3;
3683
3684 checkop1(pAsm);
3685
3686 tmp1 = gethelpr(pAsm);
3687 tmp2 = gethelpr(pAsm);
3688 tmp3 = gethelpr(pAsm);
3689
3690 // FIXME: The hardware can do fabs() directly on input
3691 // elements, but the compiler doesn't have the
3692 // capability to use that.
3693
3694 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3695
3696 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3697
3698 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3699 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3700 pAsm->D.dst.reg = tmp1;
3701 pAsm->D.dst.writex = 1;
3702
3703 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3704 {
3705 return GL_FALSE;
3706 }
3707
3708 pAsm->S[1].bits = pAsm->S[0].bits;
3709 flipneg_PVSSRC(&(pAsm->S[1].src));
3710
3711 if ( GL_FALSE == next_ins(pAsm) )
3712 {
3713 return GL_FALSE;
3714 }
3715
3716 // Entire algo:
3717 //
3718 // LG2 tmp2.x, tmp1.x
3719 // FLOOR tmp3.x, tmp2.x
3720 // MOV dst.x, tmp3.x
3721 // ADD tmp3.x, tmp2.x, -tmp3.x
3722 // EX2 dst.y, tmp3.x
3723 // MOV dst.z, tmp2.x
3724 // MOV dst.w, 1.0
3725
3726 // LG2 tmp2.x, tmp1.x
3727 // FLOOR tmp3.x, tmp2.x
3728
3729 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3730 pAsm->D.dst.math = 1;
3731
3732 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3733 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3734 pAsm->D.dst.reg = tmp2;
3735 pAsm->D.dst.writex = 1;
3736
3737 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3738 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3739 pAsm->S[0].src.reg = tmp1;
3740
3741 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3742 noneg_PVSSRC(&(pAsm->S[0].src));
3743
3744 if( GL_FALSE == next_ins(pAsm) )
3745 {
3746 return GL_FALSE;
3747 }
3748
3749 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3750
3751 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3752 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3753 pAsm->D.dst.reg = tmp3;
3754 pAsm->D.dst.writex = 1;
3755
3756 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3757 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3758 pAsm->S[0].src.reg = tmp2;
3759
3760 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3761 noneg_PVSSRC(&(pAsm->S[0].src));
3762
3763 if( GL_FALSE == next_ins(pAsm) )
3764 {
3765 return GL_FALSE;
3766 }
3767
3768 // MOV dst.x, tmp3.x
3769
3770 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3771
3772 if( GL_FALSE == assemble_dst(pAsm) )
3773 {
3774 return GL_FALSE;
3775 }
3776
3777 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3778
3779 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3780 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3781 pAsm->S[0].src.reg = tmp3;
3782
3783 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3784 noneg_PVSSRC(&(pAsm->S[0].src));
3785
3786 if( GL_FALSE == next_ins(pAsm) )
3787 {
3788 return GL_FALSE;
3789 }
3790
3791 // ADD tmp3.x, tmp2.x, -tmp3.x
3792 // EX2 dst.y, tmp3.x
3793
3794 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3795
3796 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3797 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3798 pAsm->D.dst.reg = tmp3;
3799 pAsm->D.dst.writex = 1;
3800
3801 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3802 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3803 pAsm->S[0].src.reg = tmp2;
3804
3805 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3806 noneg_PVSSRC(&(pAsm->S[0].src));
3807
3808 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3809 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3810 pAsm->S[1].src.reg = tmp3;
3811
3812 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3813 neg_PVSSRC(&(pAsm->S[1].src));
3814
3815 if( GL_FALSE == next_ins(pAsm) )
3816 {
3817 return GL_FALSE;
3818 }
3819
3820 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3821 pAsm->D.dst.math = 1;
3822
3823 if( GL_FALSE == assemble_dst(pAsm) )
3824 {
3825 return GL_FALSE;
3826 }
3827
3828 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3829
3830 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3831 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3832 pAsm->S[0].src.reg = tmp3;
3833
3834 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3835 noneg_PVSSRC(&(pAsm->S[0].src));
3836
3837 if( GL_FALSE == next_ins(pAsm) )
3838 {
3839 return GL_FALSE;
3840 }
3841
3842 // MOV dst.z, tmp2.x
3843
3844 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3845
3846 if( GL_FALSE == assemble_dst(pAsm) )
3847 {
3848 return GL_FALSE;
3849 }
3850
3851 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3852
3853 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3854 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3855 pAsm->S[0].src.reg = tmp2;
3856
3857 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3858 noneg_PVSSRC(&(pAsm->S[0].src));
3859
3860 if( GL_FALSE == next_ins(pAsm) )
3861 {
3862 return GL_FALSE;
3863 }
3864
3865 // MOV dst.w 1.0
3866
3867 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3868
3869 if( GL_FALSE == assemble_dst(pAsm) )
3870 {
3871 return GL_FALSE;
3872 }
3873
3874 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3875
3876 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3877 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3878 pAsm->S[0].src.reg = tmp1;
3879
3880 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3881 noneg_PVSSRC(&(pAsm->S[0].src));
3882
3883 if( GL_FALSE == next_ins(pAsm) )
3884 {
3885 return GL_FALSE;
3886 }
3887
3888 return GL_TRUE;
3889 }
3890
3891 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3892 {
3893 int tmp, ii;
3894 GLboolean bReplaceDst = GL_FALSE;
3895 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3896
3897 if( GL_FALSE == checkop3(pAsm) )
3898 {
3899 return GL_FALSE;
3900 }
3901
3902 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3903 pAsm->D.dst.op3 = 1;
3904
3905 tmp = (-1);
3906
3907 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3908 { /* TODO : more investigation on MAD src and dst using same register */
3909 for(ii=0; ii<3; ii++)
3910 {
3911 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3912 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3913 {
3914 bReplaceDst = GL_TRUE;
3915 break;
3916 }
3917 }
3918 }
3919 if(0xF != pILInst->DstReg.WriteMask)
3920 { /* OP3 has no support for write mask */
3921 bReplaceDst = GL_TRUE;
3922 }
3923
3924 if(GL_TRUE == bReplaceDst)
3925 {
3926 tmp = gethelpr(pAsm);
3927
3928 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3929 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3930 pAsm->D.dst.reg = tmp;
3931
3932 nomask_PVSDST(&(pAsm->D.dst));
3933 }
3934 else
3935 {
3936 if( GL_FALSE == assemble_dst(pAsm) )
3937 {
3938 return GL_FALSE;
3939 }
3940 }
3941
3942 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3943 {
3944 return GL_FALSE;
3945 }
3946
3947 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3948 {
3949 return GL_FALSE;
3950 }
3951
3952 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3953 {
3954 return GL_FALSE;
3955 }
3956
3957 if ( GL_FALSE == next_ins(pAsm) )
3958 {
3959 return GL_FALSE;
3960 }
3961
3962 if (GL_TRUE == bReplaceDst)
3963 {
3964 if( GL_FALSE == assemble_dst(pAsm) )
3965 {
3966 return GL_FALSE;
3967 }
3968
3969 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3970
3971 //tmp for source
3972 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3973 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3974 pAsm->S[0].src.reg = tmp;
3975
3976 noneg_PVSSRC(&(pAsm->S[0].src));
3977 noswizzle_PVSSRC(&(pAsm->S[0].src));
3978
3979 if( GL_FALSE == next_ins(pAsm) )
3980 {
3981 return GL_FALSE;
3982 }
3983 }
3984
3985 return GL_TRUE;
3986 }
3987
3988 /* LIT dst, src */
3989 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3990 {
3991 unsigned int dstReg;
3992 unsigned int dstType;
3993 unsigned int srcReg;
3994 unsigned int srcType;
3995 checkop1(pAsm);
3996 int tmp = gethelpr(pAsm);
3997
3998 if( GL_FALSE == assemble_dst(pAsm) )
3999 {
4000 return GL_FALSE;
4001 }
4002 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4003 {
4004 return GL_FALSE;
4005 }
4006 dstReg = pAsm->D.dst.reg;
4007 dstType = pAsm->D.dst.rtype;
4008 srcReg = pAsm->S[0].src.reg;
4009 srcType = pAsm->S[0].src.rtype;
4010
4011 /* dst.xw, <- 1.0 */
4012 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4013 pAsm->D.dst.rtype = dstType;
4014 pAsm->D.dst.reg = dstReg;
4015 pAsm->D.dst.writex = 1;
4016 pAsm->D.dst.writey = 0;
4017 pAsm->D.dst.writez = 0;
4018 pAsm->D.dst.writew = 1;
4019 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4020 pAsm->S[0].src.reg = tmp;
4021 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4022 noneg_PVSSRC(&(pAsm->S[0].src));
4023 pAsm->S[0].src.swizzlex = SQ_SEL_1;
4024 pAsm->S[0].src.swizzley = SQ_SEL_1;
4025 pAsm->S[0].src.swizzlez = SQ_SEL_1;
4026 pAsm->S[0].src.swizzlew = SQ_SEL_1;
4027 if( GL_FALSE == next_ins(pAsm) )
4028 {
4029 return GL_FALSE;
4030 }
4031
4032 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4033 {
4034 return GL_FALSE;
4035 }
4036
4037 /* dst.y = max(src.x, 0.0) */
4038 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4039 pAsm->D.dst.rtype = dstType;
4040 pAsm->D.dst.reg = dstReg;
4041 pAsm->D.dst.writex = 0;
4042 pAsm->D.dst.writey = 1;
4043 pAsm->D.dst.writez = 0;
4044 pAsm->D.dst.writew = 0;
4045 pAsm->S[0].src.rtype = srcType;
4046 pAsm->S[0].src.reg = srcReg;
4047 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4048 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4049 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4050 pAsm->S[1].src.reg = tmp;
4051 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4052 noneg_PVSSRC(&(pAsm->S[1].src));
4053 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4054 pAsm->S[1].src.swizzley = SQ_SEL_0;
4055 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4056 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4057 if( GL_FALSE == next_ins(pAsm) )
4058 {
4059 return GL_FALSE;
4060 }
4061
4062 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4063 {
4064 return GL_FALSE;
4065 }
4066
4067 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
4068
4069 /* dst.z = log(src.y) */
4070 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
4071 pAsm->D.dst.math = 1;
4072 pAsm->D.dst.rtype = dstType;
4073 pAsm->D.dst.reg = dstReg;
4074 pAsm->D.dst.writex = 0;
4075 pAsm->D.dst.writey = 0;
4076 pAsm->D.dst.writez = 1;
4077 pAsm->D.dst.writew = 0;
4078 pAsm->S[0].src.rtype = srcType;
4079 pAsm->S[0].src.reg = srcReg;
4080 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4081 if( GL_FALSE == next_ins(pAsm) )
4082 {
4083 return GL_FALSE;
4084 }
4085
4086 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4087 {
4088 return GL_FALSE;
4089 }
4090
4091 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
4092 {
4093 return GL_FALSE;
4094 }
4095
4096 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4097
4098 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4099
4100 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4101 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
4102 pAsm->D.dst.math = 1;
4103 pAsm->D.dst.op3 = 1;
4104 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4105 pAsm->D.dst.reg = tmp;
4106 pAsm->D.dst.writex = 1;
4107 pAsm->D.dst.writey = 0;
4108 pAsm->D.dst.writez = 0;
4109 pAsm->D.dst.writew = 0;
4110
4111 pAsm->S[0].src.rtype = srcType;
4112 pAsm->S[0].src.reg = srcReg;
4113 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4114
4115 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4116 pAsm->S[1].src.reg = dstReg;
4117 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4118 noneg_PVSSRC(&(pAsm->S[1].src));
4119 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
4120 pAsm->S[1].src.swizzley = SQ_SEL_Z;
4121 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4122 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
4123
4124 pAsm->S[2].src.rtype = srcType;
4125 pAsm->S[2].src.reg = srcReg;
4126 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4127
4128 if( GL_FALSE == next_ins(pAsm) )
4129 {
4130 return GL_FALSE;
4131 }
4132
4133 /* dst.z = exp(tmp.x) */
4134 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4135 pAsm->D.dst.math = 1;
4136 pAsm->D.dst.rtype = dstType;
4137 pAsm->D.dst.reg = dstReg;
4138 pAsm->D.dst.writex = 0;
4139 pAsm->D.dst.writey = 0;
4140 pAsm->D.dst.writez = 1;
4141 pAsm->D.dst.writew = 0;
4142
4143 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4144 pAsm->S[0].src.reg = tmp;
4145 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4146 noneg_PVSSRC(&(pAsm->S[0].src));
4147 pAsm->S[0].src.swizzlex = SQ_SEL_X;
4148 pAsm->S[0].src.swizzley = SQ_SEL_X;
4149 pAsm->S[0].src.swizzlez = SQ_SEL_X;
4150 pAsm->S[0].src.swizzlew = SQ_SEL_X;
4151
4152 if( GL_FALSE == next_ins(pAsm) )
4153 {
4154 return GL_FALSE;
4155 }
4156
4157 return GL_TRUE;
4158 }
4159
4160 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
4161 {
4162 if( GL_FALSE == checkop2(pAsm) )
4163 {
4164 return GL_FALSE;
4165 }
4166
4167 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4168
4169 if( GL_FALSE == assemble_dst(pAsm) )
4170 {
4171 return GL_FALSE;
4172 }
4173
4174 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4175 {
4176 return GL_FALSE;
4177 }
4178
4179 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4180 {
4181 return GL_FALSE;
4182 }
4183
4184 if( GL_FALSE == next_ins(pAsm) )
4185 {
4186 return GL_FALSE;
4187 }
4188
4189 return GL_TRUE;
4190 }
4191
4192 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
4193 {
4194 if( GL_FALSE == checkop2(pAsm) )
4195 {
4196 return GL_FALSE;
4197 }
4198
4199 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
4200
4201 if( GL_FALSE == assemble_dst(pAsm) )
4202 {
4203 return GL_FALSE;
4204 }
4205
4206 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4207 {
4208 return GL_FALSE;
4209 }
4210
4211 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4212 {
4213 return GL_FALSE;
4214 }
4215
4216 if( GL_FALSE == next_ins(pAsm) )
4217 {
4218 return GL_FALSE;
4219 }
4220
4221 return GL_TRUE;
4222 }
4223
4224 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
4225 {
4226 checkop1(pAsm);
4227
4228 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4229
4230 if (GL_FALSE == assemble_dst(pAsm))
4231 {
4232 return GL_FALSE;
4233 }
4234
4235 if (GL_FALSE == assemble_src(pAsm, 0, -1))
4236 {
4237 return GL_FALSE;
4238 }
4239
4240 if ( GL_FALSE == next_ins(pAsm) )
4241 {
4242 return GL_FALSE;
4243 }
4244
4245 return GL_TRUE;
4246 }
4247
4248 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
4249 {
4250 if( GL_FALSE == checkop2(pAsm) )
4251 {
4252 return GL_FALSE;
4253 }
4254
4255 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4256
4257 if( GL_FALSE == assemble_dst(pAsm) )
4258 {
4259 return GL_FALSE;
4260 }
4261
4262 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4263 {
4264 return GL_FALSE;
4265 }
4266
4267 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4268 {
4269 return GL_FALSE;
4270 }
4271
4272 if( GL_FALSE == next_ins(pAsm) )
4273 {
4274 return GL_FALSE;
4275 }
4276
4277 return GL_TRUE;
4278 }
4279
4280 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
4281 {
4282 BITS tmp;
4283
4284 checkop1(pAsm);
4285
4286 tmp = gethelpr(pAsm);
4287
4288 // LG2 tmp.x, a.swizzle
4289 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4290 pAsm->D.dst.math = 1;
4291
4292 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4293 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4294 pAsm->D.dst.reg = tmp;
4295 nomask_PVSDST(&(pAsm->D.dst));
4296
4297 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4298 {
4299 return GL_FALSE;
4300 }
4301
4302 if( GL_FALSE == next_ins(pAsm) )
4303 {
4304 return GL_FALSE;
4305 }
4306
4307 // MUL tmp.x, tmp.x, b.swizzle
4308 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4309
4310 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4311 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4312 pAsm->D.dst.reg = tmp;
4313 nomask_PVSDST(&(pAsm->D.dst));
4314
4315 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4316 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4317 pAsm->S[0].src.reg = tmp;
4318 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4319 noneg_PVSSRC(&(pAsm->S[0].src));
4320
4321 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4322 {
4323 return GL_FALSE;
4324 }
4325
4326 if( GL_FALSE == next_ins(pAsm) )
4327 {
4328 return GL_FALSE;
4329 }
4330
4331 // EX2 dst.mask, tmp.x
4332 // EX2 tmp.x, tmp.x
4333 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4334 pAsm->D.dst.math = 1;
4335
4336 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4337 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4338 pAsm->D.dst.reg = tmp;
4339 nomask_PVSDST(&(pAsm->D.dst));
4340
4341 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4342 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4343 pAsm->S[0].src.reg = tmp;
4344 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4345 noneg_PVSSRC(&(pAsm->S[0].src));
4346
4347 if( GL_FALSE == next_ins(pAsm) )
4348 {
4349 return GL_FALSE;
4350 }
4351
4352 // Now replicate result to all necessary channels in destination
4353 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4354
4355 if( GL_FALSE == assemble_dst(pAsm) )
4356 {
4357 return GL_FALSE;
4358 }
4359
4360 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4361 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4362 pAsm->S[0].src.reg = tmp;
4363
4364 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4365 noneg_PVSSRC(&(pAsm->S[0].src));
4366
4367 if( GL_FALSE == next_ins(pAsm) )
4368 {
4369 return GL_FALSE;
4370 }
4371
4372 return GL_TRUE;
4373 }
4374
4375 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4376 {
4377 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4378 }
4379
4380 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4381 {
4382 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4383 }
4384
4385 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
4386 {
4387 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
4388 }
4389
4390 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4391 {
4392 BITS tmp;
4393
4394 checkop1(pAsm);
4395
4396 tmp = gethelpr(pAsm);
4397
4398 // COS tmp.x, a.x
4399 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4400 pAsm->D.dst.math = 1;
4401
4402 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4403 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4404 pAsm->D.dst.reg = tmp;
4405 pAsm->D.dst.writex = 1;
4406
4407 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4408 {
4409 return GL_FALSE;
4410 }
4411
4412 if ( GL_FALSE == next_ins(pAsm) )
4413 {
4414 return GL_FALSE;
4415 }
4416
4417 // SIN tmp.y, a.x
4418 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4419 pAsm->D.dst.math = 1;
4420
4421 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4422 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4423 pAsm->D.dst.reg = tmp;
4424 pAsm->D.dst.writey = 1;
4425
4426 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4427 {
4428 return GL_FALSE;
4429 }
4430
4431 if( GL_FALSE == next_ins(pAsm) )
4432 {
4433 return GL_FALSE;
4434 }
4435
4436 // MOV dst.mask, tmp
4437 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4438
4439 if( GL_FALSE == assemble_dst(pAsm) )
4440 {
4441 return GL_FALSE;
4442 }
4443
4444 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4445 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4446 pAsm->S[0].src.reg = tmp;
4447
4448 noswizzle_PVSSRC(&(pAsm->S[0].src));
4449 pAsm->S[0].src.swizzlez = SQ_SEL_0;
4450 pAsm->S[0].src.swizzlew = SQ_SEL_0;
4451
4452 if ( GL_FALSE == next_ins(pAsm) )
4453 {
4454 return GL_FALSE;
4455 }
4456
4457 return GL_TRUE;
4458 }
4459
4460 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4461 {
4462 if( GL_FALSE == checkop2(pAsm) )
4463 {
4464 return GL_FALSE;
4465 }
4466
4467 pAsm->D.dst.opcode = opcode;
4468 pAsm->D.dst.math = 1;
4469
4470 if( GL_FALSE == assemble_dst(pAsm) )
4471 {
4472 return GL_FALSE;
4473 }
4474
4475 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4476 {
4477 return GL_FALSE;
4478 }
4479
4480 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4481 {
4482 return GL_FALSE;
4483 }
4484
4485 if( GL_FALSE == next_ins(pAsm) )
4486 {
4487 return GL_FALSE;
4488 }
4489
4490 return GL_TRUE;
4491 }
4492
4493 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4494 {
4495 if( GL_FALSE == checkop2(pAsm) )
4496 {
4497 return GL_FALSE;
4498 }
4499
4500 pAsm->D.dst.opcode = opcode;
4501 pAsm->D.dst.math = 1;
4502 pAsm->D.dst.predicated = 1;
4503 pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
4504
4505 if( GL_FALSE == assemble_dst(pAsm) )
4506 {
4507 return GL_FALSE;
4508 }
4509
4510 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4511 {
4512 return GL_FALSE;
4513 }
4514
4515 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4516 {
4517 return GL_FALSE;
4518 }
4519
4520 if( GL_FALSE == next_ins2(pAsm) )
4521 {
4522 return GL_FALSE;
4523 }
4524
4525 return GL_TRUE;
4526 }
4527
4528 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4529 {
4530 if( GL_FALSE == checkop2(pAsm) )
4531 {
4532 return GL_FALSE;
4533 }
4534
4535 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4536
4537 if( GL_FALSE == assemble_dst(pAsm) )
4538 {
4539 return GL_FALSE;
4540 }
4541
4542 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4543 {
4544 return GL_FALSE;
4545 }
4546
4547 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4548 {
4549 return GL_FALSE;
4550 }
4551
4552 if( GL_FALSE == next_ins(pAsm) )
4553 {
4554 return GL_FALSE;
4555 }
4556
4557 return GL_TRUE;
4558 }
4559
4560 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4561 {
4562 if( GL_FALSE == checkop2(pAsm) )
4563 {
4564 return GL_FALSE;
4565 }
4566
4567 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4568
4569 if( GL_FALSE == assemble_dst(pAsm) )
4570 {
4571 return GL_FALSE;
4572 }
4573
4574 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4575 {
4576 return GL_FALSE;
4577 }
4578
4579 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4580 {
4581 return GL_FALSE;
4582 }
4583
4584 if( GL_FALSE == next_ins(pAsm) )
4585 {
4586 return GL_FALSE;
4587 }
4588
4589 return GL_TRUE;
4590 }
4591
4592 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
4593 {
4594 return GL_TRUE;
4595 }
4596
4597 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
4598 {
4599 GLboolean src_const;
4600 GLboolean need_barrier = GL_FALSE;
4601
4602 checkop1(pAsm);
4603
4604 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
4605 {
4606 case PROGRAM_CONSTANT:
4607 case PROGRAM_LOCAL_PARAM:
4608 case PROGRAM_ENV_PARAM:
4609 case PROGRAM_STATE_VAR:
4610 src_const = GL_TRUE;
4611 break;
4612 case PROGRAM_TEMPORARY:
4613 case PROGRAM_INPUT:
4614 default:
4615 src_const = GL_FALSE;
4616 break;
4617 }
4618
4619 if (GL_TRUE == src_const)
4620 {
4621 if ( GL_FALSE == mov_temp(pAsm, 0) )
4622 return GL_FALSE;
4623 need_barrier = GL_TRUE;
4624 }
4625
4626 switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
4627 {
4628 case OPCODE_TEX:
4629 break;
4630 case OPCODE_TXB:
4631 radeon_error("do not support TXB yet\n");
4632 return GL_FALSE;
4633 break;
4634 case OPCODE_TXP:
4635 break;
4636 default:
4637 radeon_error("Internal error: bad texture op (not TEX)\n");
4638 return GL_FALSE;
4639 break;
4640 }
4641
4642 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4643 {
4644 GLuint tmp = gethelpr(pAsm);
4645 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4646 pAsm->D.dst.math = 1;
4647 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4648 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4649 pAsm->D.dst.reg = tmp;
4650 pAsm->D.dst.writew = 1;
4651
4652 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4653 {
4654 return GL_FALSE;
4655 }
4656 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4657 if( GL_FALSE == next_ins(pAsm) )
4658 {
4659 return GL_FALSE;
4660 }
4661
4662 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4663 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4664 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4665 pAsm->D.dst.reg = tmp;
4666 pAsm->D.dst.writex = 1;
4667 pAsm->D.dst.writey = 1;
4668 pAsm->D.dst.writez = 1;
4669 pAsm->D.dst.writew = 0;
4670
4671 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4672 {
4673 return GL_FALSE;
4674 }
4675 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4676 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4677 pAsm->S[1].src.reg = tmp;
4678 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
4679
4680 if( GL_FALSE == next_ins(pAsm) )
4681 {
4682 return GL_FALSE;
4683 }
4684
4685 pAsm->aArgSubst[1] = tmp;
4686 need_barrier = GL_TRUE;
4687 }
4688
4689 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
4690 {
4691 GLuint tmp1 = gethelpr(pAsm);
4692 GLuint tmp2 = gethelpr(pAsm);
4693
4694 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4695 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
4696 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4697 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4698 pAsm->D.dst.reg = tmp1;
4699 nomask_PVSDST(&(pAsm->D.dst));
4700
4701 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4702 {
4703 return GL_FALSE;
4704 }
4705
4706 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4707 {
4708 return GL_FALSE;
4709 }
4710
4711 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
4712 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
4713
4714 if( GL_FALSE == next_ins(pAsm) )
4715 {
4716 return GL_FALSE;
4717 }
4718
4719 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
4720 * have to do explicit instruction
4721 */
4722 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4723 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4724 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4725 pAsm->D.dst.reg = tmp1;
4726 pAsm->D.dst.writez = 1;
4727
4728 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4729 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4730 pAsm->S[0].src.reg = tmp1;
4731 noswizzle_PVSSRC(&(pAsm->S[0].src));
4732 pAsm->S[1].bits = pAsm->S[0].bits;
4733 flipneg_PVSSRC(&(pAsm->S[1].src));
4734
4735 next_ins(pAsm);
4736
4737 /* tmp1.z = RCP_e(|tmp1.z|) */
4738 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4739 pAsm->D.dst.math = 1;
4740 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4741 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4742 pAsm->D.dst.reg = tmp1;
4743 pAsm->D.dst.writez = 1;
4744
4745 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4746 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4747 pAsm->S[0].src.reg = tmp1;
4748 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4749
4750 next_ins(pAsm);
4751
4752 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4753 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4754 * muladd has no writemask, have to use another temp
4755 * also no support for imm constants, so add 1 here
4756 */
4757 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4758 pAsm->D.dst.op3 = 1;
4759 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4760 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4761 pAsm->D.dst.reg = tmp2;
4762
4763 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4764 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4765 pAsm->S[0].src.reg = tmp1;
4766 noswizzle_PVSSRC(&(pAsm->S[0].src));
4767 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4768 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4769 pAsm->S[1].src.reg = tmp1;
4770 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4771 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4772 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4773 pAsm->S[2].src.reg = tmp1;
4774 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
4775
4776 next_ins(pAsm);
4777
4778 /* ADD the remaining .5 */
4779 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4780 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4781 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4782 pAsm->D.dst.reg = tmp2;
4783 pAsm->D.dst.writex = 1;
4784 pAsm->D.dst.writey = 1;
4785 pAsm->D.dst.writez = 0;
4786 pAsm->D.dst.writew = 0;
4787
4788 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4789 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4790 pAsm->S[0].src.reg = tmp2;
4791 noswizzle_PVSSRC(&(pAsm->S[0].src));
4792 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4793 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4794 pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
4795 noswizzle_PVSSRC(&(pAsm->S[1].src));
4796
4797 next_ins(pAsm);
4798
4799 /* tmp1.xy = temp2.xy */
4800 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4801 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4802 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4803 pAsm->D.dst.reg = tmp1;
4804 pAsm->D.dst.writex = 1;
4805 pAsm->D.dst.writey = 1;
4806 pAsm->D.dst.writez = 0;
4807 pAsm->D.dst.writew = 0;
4808
4809 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4810 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4811 pAsm->S[0].src.reg = tmp2;
4812 noswizzle_PVSSRC(&(pAsm->S[0].src));
4813
4814 next_ins(pAsm);
4815 pAsm->aArgSubst[1] = tmp1;
4816 need_barrier = GL_TRUE;
4817
4818 }
4819
4820 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4821 pAsm->is_tex = GL_TRUE;
4822 if ( GL_TRUE == need_barrier )
4823 {
4824 pAsm->need_tex_barrier = GL_TRUE;
4825 }
4826 // Set src1 to tex unit id
4827 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
4828 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4829
4830 //No sw info from mesa compiler, so hard code here.
4831 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4832 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4833 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4834 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4835
4836 if( GL_FALSE == tex_dst(pAsm) )
4837 {
4838 return GL_FALSE;
4839 }
4840
4841 if( GL_FALSE == tex_src(pAsm) )
4842 {
4843 return GL_FALSE;
4844 }
4845
4846 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4847 {
4848 /* hopefully did swizzles before */
4849 noswizzle_PVSSRC(&(pAsm->S[0].src));
4850 }
4851
4852 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4853 {
4854 /* SAMPLE dst, tmp.yxwy, CUBE */
4855 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4856 pAsm->S[0].src.swizzley = SQ_SEL_X;
4857 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4858 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4859 }
4860
4861 if ( GL_FALSE == next_ins(pAsm) )
4862 {
4863 return GL_FALSE;
4864 }
4865
4866 return GL_TRUE;
4867 }
4868
4869 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4870 {
4871 BITS tmp;
4872
4873 if( GL_FALSE == checkop2(pAsm) )
4874 {
4875 return GL_FALSE;
4876 }
4877
4878 tmp = gethelpr(pAsm);
4879
4880 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4881
4882 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4883 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4884 pAsm->D.dst.reg = tmp;
4885 nomask_PVSDST(&(pAsm->D.dst));
4886
4887 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4888 {
4889 return GL_FALSE;
4890 }
4891
4892 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4893 {
4894 return GL_FALSE;
4895 }
4896
4897 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4898 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4899
4900 if( GL_FALSE == next_ins(pAsm) )
4901 {
4902 return GL_FALSE;
4903 }
4904
4905 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4906 pAsm->D.dst.op3 = 1;
4907
4908 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4909 {
4910 tmp = gethelpr(pAsm);
4911
4912 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4913 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4914 pAsm->D.dst.reg = tmp;
4915
4916 nomask_PVSDST(&(pAsm->D.dst));
4917 }
4918 else
4919 {
4920 if( GL_FALSE == assemble_dst(pAsm) )
4921 {
4922 return GL_FALSE;
4923 }
4924 }
4925
4926 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4927 {
4928 return GL_FALSE;
4929 }
4930
4931 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4932 {
4933 return GL_FALSE;
4934 }
4935
4936 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4937 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4938
4939 // result1 + (neg) result0
4940 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4941 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4942 pAsm->S[2].src.reg = tmp;
4943
4944 neg_PVSSRC(&(pAsm->S[2].src));
4945 noswizzle_PVSSRC(&(pAsm->S[2].src));
4946
4947 if( GL_FALSE == next_ins(pAsm) )
4948 {
4949 return GL_FALSE;
4950 }
4951
4952
4953 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4954 {
4955 if( GL_FALSE == assemble_dst(pAsm) )
4956 {
4957 return GL_FALSE;
4958 }
4959
4960 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4961
4962 // Use tmp as source
4963 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4964 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4965 pAsm->S[0].src.reg = tmp;
4966
4967 noneg_PVSSRC(&(pAsm->S[0].src));
4968 noswizzle_PVSSRC(&(pAsm->S[0].src));
4969
4970 if( GL_FALSE == next_ins(pAsm) )
4971 {
4972 return GL_FALSE;
4973 }
4974 }
4975
4976 return GL_TRUE;
4977 }
4978
4979 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4980 {
4981 return GL_TRUE;
4982 }
4983
4984 inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason)
4985 {
4986 switch (uReason)
4987 {
4988 case FC_PUSH_VPM:
4989 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++;
4990 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current++;
4991 break;
4992 case FC_PUSH_WQM:
4993 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs++;
4994 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.current += 4;
4995 break;
4996 case FC_LOOP:
4997 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 4;
4998 break;
4999 case FC_REP:
5000 /* TODO : for 16 vp asic, should += 2; */
5001 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs += 1;
5002 break;
5003 };
5004
5005 if(pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs
5006 > pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max)
5007 {
5008 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.max =
5009 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.su.pushs;
5010 }
5011 }
5012
5013 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
5014 {
5015 if(GL_FALSE == add_cf_instruction(pAsm) )
5016 {
5017 return GL_FALSE;
5018 }
5019
5020 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5021 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5022 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5023
5024 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5025 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5026 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5027 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5028
5029 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5030
5031 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
5032
5033 return GL_TRUE;
5034 }
5035
5036 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
5037 {
5038 if(GL_FALSE == add_cf_instruction(pAsm) )
5039 {
5040 return GL_FALSE;
5041 }
5042
5043 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5044 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5045 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5046
5047 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5048 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5049 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5050
5051 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5052
5053 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5054 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5055
5056 return GL_TRUE;
5057 }
5058
5059 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
5060 {
5061 if(GL_FALSE == add_cf_instruction(pAsm) )
5062 {
5063 return GL_FALSE;
5064 }
5065
5066 if(GL_TRUE != bHasElse)
5067 {
5068 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5069 }
5070 else
5071 {
5072 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5073 }
5074 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5075 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5076
5077 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5078 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5079 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5080 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5081
5082 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5083
5084 pAsm->FCSP++;
5085 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
5086 pAsm->fc_stack[pAsm->FCSP].bpush = 0;
5087 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5088 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
5089 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5090
5091 #ifndef USE_CF_FOR_POP_AFTER
5092 if(GL_TRUE != bHasElse)
5093 {
5094 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5095 }
5096 #endif /* USE_CF_FOR_POP_AFTER */
5097
5098 pAsm->branch_depth++;
5099
5100 if(pAsm->branch_depth > pAsm->max_branch_depth)
5101 {
5102 pAsm->max_branch_depth = pAsm->branch_depth;
5103 }
5104 return GL_TRUE;
5105 }
5106
5107 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
5108 {
5109 if(GL_FALSE == add_cf_instruction(pAsm) )
5110 {
5111 return GL_FALSE;
5112 }
5113
5114 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
5115 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5116 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5117
5118 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5119 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5120 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
5121 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5122
5123 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5124
5125 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
5126 0,
5127 sizeof(R700ControlFlowGenericClause *) );
5128 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
5129 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5130
5131 #ifndef USE_CF_FOR_POP_AFTER
5132 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5133 #endif /* USE_CF_FOR_POP_AFTER */
5134
5135 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
5136
5137 return GL_TRUE;
5138 }
5139
5140 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
5141 {
5142 #ifdef USE_CF_FOR_POP_AFTER
5143 pops(pAsm, 1);
5144 #endif /* USE_CF_FOR_POP_AFTER */
5145
5146 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5147
5148 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
5149 {
5150 /* no else in between */
5151 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5152 }
5153 else
5154 {
5155 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5156 }
5157
5158 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5159 {
5160 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5161 }
5162
5163 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
5164 {
5165 radeon_error("if/endif in shader code are not paired. \n");
5166 return GL_FALSE;
5167 }
5168 pAsm->branch_depth--;
5169 pAsm->FCSP--;
5170
5171 return GL_TRUE;
5172 }
5173
5174 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
5175 {
5176 if(GL_FALSE == add_cf_instruction(pAsm) )
5177 {
5178 return GL_FALSE;
5179 }
5180
5181
5182 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5183 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5184 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5185
5186 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5187 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5188 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
5189 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5190
5191 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5192
5193 pAsm->FCSP++;
5194 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
5195 pAsm->fc_stack[pAsm->FCSP].bpush = 1;
5196 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5197 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
5198 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
5199 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5200
5201 pAsm->branch_depth++;
5202
5203 if(pAsm->branch_depth > pAsm->max_branch_depth)
5204 {
5205 pAsm->max_branch_depth = pAsm->branch_depth;
5206 }
5207 return GL_TRUE;
5208 }
5209
5210 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
5211 {
5212 #ifdef USE_CF_FOR_CONTINUE_BREAK
5213 unsigned int unFCSP;
5214 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5215 {
5216 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5217 {
5218 break;
5219 }
5220 }
5221 if(0 == FC_LOOP)
5222 {
5223 radeon_error("Break is not inside loop/endloop pair.\n");
5224 return GL_FALSE;
5225 }
5226
5227 if(GL_FALSE == add_cf_instruction(pAsm) )
5228 {
5229 return GL_FALSE;
5230 }
5231
5232
5233 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5234 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5235 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5236
5237 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5238 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5239 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5240
5241 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5242
5243 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5244
5245 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5246 (void *)pAsm->fc_stack[unFCSP].mid,
5247 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5248 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5249 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5250 pAsm->fc_stack[unFCSP].unNumMid++;
5251
5252 if(GL_FALSE == add_cf_instruction(pAsm) )
5253 {
5254 return GL_FALSE;
5255 }
5256
5257 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5258 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5259 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5260
5261 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5262 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5263 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5264
5265 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5266
5267 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5268 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5269
5270 #endif //USE_CF_FOR_CONTINUE_BREAK
5271 return GL_TRUE;
5272 }
5273
5274 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
5275 {
5276 #ifdef USE_CF_FOR_CONTINUE_BREAK
5277 unsigned int unFCSP;
5278 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5279 {
5280 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5281 {
5282 break;
5283 }
5284 }
5285 if(0 == FC_LOOP)
5286 {
5287 radeon_error("Continue is not inside loop/endloop pair.\n");
5288 return GL_FALSE;
5289 }
5290
5291 if(GL_FALSE == add_cf_instruction(pAsm) )
5292 {
5293 return GL_FALSE;
5294 }
5295
5296
5297 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5298 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5299 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5300
5301 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5302 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5303 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
5304
5305 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5306
5307 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5308
5309 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5310 (void *)pAsm->fc_stack[unFCSP].mid,
5311 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5312 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5313 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5314 pAsm->fc_stack[unFCSP].unNumMid++;
5315
5316 if(GL_FALSE == add_cf_instruction(pAsm) )
5317 {
5318 return GL_FALSE;
5319 }
5320
5321 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5322 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5323 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5324
5325 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5326 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5327 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5328
5329 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5330
5331 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5332 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5333
5334 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5335
5336 return GL_TRUE;
5337 }
5338
5339 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
5340 {
5341 GLuint i;
5342
5343 if(GL_FALSE == add_cf_instruction(pAsm) )
5344 {
5345 return GL_FALSE;
5346 }
5347
5348
5349 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5350 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5351 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5352
5353 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5354 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5355 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
5356 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5357
5358 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5359
5360 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
5361 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5362
5363 #ifdef USE_CF_FOR_CONTINUE_BREAK
5364 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
5365 {
5366 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
5367 }
5368 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5369 {
5370 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5371 }
5372 #endif
5373
5374 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
5375 {
5376 radeon_error("loop/endloop in shader code are not paired. \n");
5377 return GL_FALSE;
5378 }
5379
5380 unsigned int unFCSP = 0;
5381 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
5382 {
5383 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5384 {
5385 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5386 {
5387 break;
5388 }
5389 }
5390 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
5391 {
5392 unFCSP = 0;
5393
5394 returnOnFlag(pAsm);
5395 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
5396 }
5397 }
5398
5399 pAsm->branch_depth--;
5400 pAsm->FCSP--;
5401
5402 if(unFCSP > 0)
5403 {
5404 breakLoopOnFlag(pAsm, unFCSP);
5405 }
5406
5407 return GL_TRUE;
5408 }
5409
5410 void add_return_inst(r700_AssemblerBase *pAsm)
5411 {
5412 if(GL_FALSE == add_cf_instruction(pAsm) )
5413 {
5414 return GL_FALSE;
5415 }
5416 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5417 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5418 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5419 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5420
5421 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5422 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5423 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
5424 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5425
5426 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5427 }
5428
5429 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
5430 {
5431 /* Put in sub */
5432 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
5433 {
5434 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
5435 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
5436 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
5437 if(NULL == pAsm->subs)
5438 {
5439 return GL_FALSE;
5440 }
5441 pAsm->unSubArraySize += 10;
5442 }
5443
5444 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
5445 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
5446 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
5447 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
5448
5449 pAsm->CALLSP++;
5450 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
5451 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
5452 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
5453 pAsm->CALLSTACK[pAsm->CALLSP].stackUsage.bits = 0;
5454 SetActiveCFlist(pAsm->pR700Shader,
5455 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5456
5457 pAsm->unSubArrayPointer++;
5458
5459 /* start sub */
5460 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5461
5462 return GL_TRUE;
5463 }
5464
5465 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
5466 {
5467 pAsm->CALLSP--;
5468 SetActiveCFlist(pAsm->pR700Shader,
5469 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5470
5471 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5472
5473 return GL_TRUE;
5474 }
5475
5476 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
5477 {
5478 if(pAsm->CALLSP > 0)
5479 { /* in sub */
5480 unsigned int unFCSP;
5481 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5482 {
5483 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5484 {
5485 setRetInLoopFlag(pAsm, SQ_SEL_1);
5486 breakLoopOnFlag(pAsm, unFCSP);
5487 pAsm->unCFflags |= LOOPRET_FLAGS;
5488
5489 return GL_TRUE;
5490 }
5491 }
5492 }
5493
5494 add_return_inst(pAsm);
5495
5496 return GL_TRUE;
5497 }
5498
5499 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
5500 GLint nILindex,
5501 GLuint uiNumberInsts,
5502 struct prog_instruction *pILInst)
5503 {
5504 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5505
5506 if(GL_FALSE == add_cf_instruction(pAsm) )
5507 {
5508 return GL_FALSE;
5509 }
5510
5511 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
5512 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5513 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5514 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5515
5516 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5517 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5518 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
5519 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5520
5521 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5522
5523 /* Put in caller */
5524 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
5525 {
5526 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
5527 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
5528 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
5529 if(NULL == pAsm->callers)
5530 {
5531 return GL_FALSE;
5532 }
5533 pAsm->unCallerArraySize += 10;
5534 }
5535
5536 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
5537 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
5538
5539 pAsm->unCallerArrayPointer++;
5540
5541 int j;
5542 for(j=0; j<pAsm->unSubArrayPointer; j++)
5543 {
5544 if(nILindex == pAsm->subs[j].subIL_Offset)
5545 { /* compiled before */
5546 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
5547 return GL_TRUE;
5548 }
5549 }
5550
5551 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
5552
5553 return AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
5554 }
5555
5556 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
5557 {
5558 GLfloat fLiteral[2] = {0.1, 0.0};
5559
5560 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5561 pAsm->D.dst.op3 = 0;
5562 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5563 pAsm->D.dst.reg = pAsm->flag_reg_index;
5564 pAsm->D.dst.writex = 1;
5565 pAsm->D.dst.writey = 0;
5566 pAsm->D.dst.writez = 0;
5567 pAsm->D.dst.writew = 0;
5568 pAsm->D2.dst2.literal = 1;
5569 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5570 pAsm->D.dst.predicated = 0;
5571 #if 0
5572 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
5573 //pAsm->S[0].src.reg = 0;
5574 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5575 noneg_PVSSRC(&(pAsm->S[0].src));
5576 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5577 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5578 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5579 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5580
5581 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5582 {
5583 return GL_FALSE;
5584 }
5585 #else
5586 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5587 pAsm->S[0].src.reg = 0;
5588 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5589 noneg_PVSSRC(&(pAsm->S[0].src));
5590 pAsm->S[0].src.swizzlex = flagValue;
5591 pAsm->S[0].src.swizzley = flagValue;
5592 pAsm->S[0].src.swizzlez = flagValue;
5593 pAsm->S[0].src.swizzlew = flagValue;
5594
5595 if( GL_FALSE == next_ins2(pAsm) )
5596 {
5597 return GL_FALSE;
5598 }
5599 #endif
5600
5601 return GL_TRUE;
5602 }
5603
5604 GLboolean testFlag(r700_AssemblerBase *pAsm)
5605 {
5606 GLfloat fLiteral[2] = {0.1, 0.0};
5607
5608 //Test flag
5609 GLuint tmp = gethelpr(pAsm);
5610 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5611
5612 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
5613 pAsm->D.dst.math = 1;
5614 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5615 pAsm->D.dst.reg = tmp;
5616 pAsm->D.dst.writex = 1;
5617 pAsm->D.dst.writey = 0;
5618 pAsm->D.dst.writez = 0;
5619 pAsm->D.dst.writew = 0;
5620 pAsm->D2.dst2.literal = 1;
5621 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5622 pAsm->D.dst.predicated = 1;
5623
5624 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5625 pAsm->S[0].src.reg = pAsm->flag_reg_index;
5626 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5627 noneg_PVSSRC(&(pAsm->S[0].src));
5628 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5629 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5630 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5631 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5632 #if 0
5633 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
5634 //pAsm->S[1].src.reg = 0;
5635 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5636 noneg_PVSSRC(&(pAsm->S[1].src));
5637 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5638 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5639 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5640 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5641
5642 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5643 {
5644 return GL_FALSE;
5645 }
5646 #else
5647 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
5648 pAsm->S[1].src.reg = 0;
5649 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5650 noneg_PVSSRC(&(pAsm->S[1].src));
5651 pAsm->S[1].src.swizzlex = SQ_SEL_1;
5652 pAsm->S[1].src.swizzley = SQ_SEL_1;
5653 pAsm->S[1].src.swizzlez = SQ_SEL_1;
5654 pAsm->S[1].src.swizzlew = SQ_SEL_1;
5655
5656 if( GL_FALSE == next_ins2(pAsm) )
5657 {
5658 return GL_FALSE;
5659 }
5660 #endif
5661
5662 checkStackDepth(pAsm, FC_PUSH_VPM);
5663
5664 return GL_TRUE;
5665 }
5666
5667 GLboolean returnOnFlag(r700_AssemblerBase *pAsm)
5668 {
5669 testFlag(pAsm);
5670 jumpToOffest(pAsm, 1, 4);
5671 setRetInLoopFlag(pAsm, SQ_SEL_0);
5672 pops(pAsm, 1);
5673 add_return_inst(pAsm);
5674
5675 return GL_TRUE;
5676 }
5677
5678 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
5679 {
5680 testFlag(pAsm);
5681
5682 //break
5683 if(GL_FALSE == add_cf_instruction(pAsm) )
5684 {
5685 return GL_FALSE;
5686 }
5687
5688 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5689 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5690 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5691
5692 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5693 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5694 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5695 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5696
5697 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5698
5699 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5700 (void *)pAsm->fc_stack[unFCSP].mid,
5701 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5702 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5703 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5704 pAsm->fc_stack[unFCSP].unNumMid++;
5705
5706 pops(pAsm, 1);
5707
5708 return GL_TRUE;
5709 }
5710
5711 GLboolean AssembleInstr(GLuint uiFirstInst,
5712 GLuint uiNumberInsts,
5713 struct prog_instruction *pILInst,
5714 r700_AssemblerBase *pR700AsmCode)
5715 {
5716 GLuint i;
5717
5718 pR700AsmCode->pILInst = pILInst;
5719 for(i=uiFirstInst; i<uiNumberInsts; i++)
5720 {
5721 pR700AsmCode->uiCurInst = i;
5722
5723 #ifndef USE_CF_FOR_CONTINUE_BREAK
5724 if(OPCODE_BRK == pILInst[i+1].Opcode)
5725 {
5726 switch(pILInst[i].Opcode)
5727 {
5728 case OPCODE_SLE:
5729 pILInst[i].Opcode = OPCODE_SGT;
5730 break;
5731 case OPCODE_SLT:
5732 pILInst[i].Opcode = OPCODE_SGE;
5733 break;
5734 case OPCODE_SGE:
5735 pILInst[i].Opcode = OPCODE_SLT;
5736 break;
5737 case OPCODE_SGT:
5738 pILInst[i].Opcode = OPCODE_SLE;
5739 break;
5740 case OPCODE_SEQ:
5741 pILInst[i].Opcode = OPCODE_SNE;
5742 break;
5743 case OPCODE_SNE:
5744 pILInst[i].Opcode = OPCODE_SEQ;
5745 break;
5746 default:
5747 break;
5748 }
5749 }
5750 #endif
5751
5752 switch (pILInst[i].Opcode)
5753 {
5754 case OPCODE_ABS:
5755 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
5756 return GL_FALSE;
5757 break;
5758 case OPCODE_ADD:
5759 case OPCODE_SUB:
5760 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
5761 return GL_FALSE;
5762 break;
5763
5764 case OPCODE_ARL:
5765 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
5766 return GL_FALSE;
5767 break;
5768 case OPCODE_ARR:
5769 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5770 //if ( GL_FALSE == assemble_BAD("ARR") )
5771 return GL_FALSE;
5772 break;
5773
5774 case OPCODE_CMP:
5775 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
5776 return GL_FALSE;
5777 break;
5778 case OPCODE_COS:
5779 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
5780 return GL_FALSE;
5781 break;
5782
5783 case OPCODE_DP3:
5784 case OPCODE_DP4:
5785 case OPCODE_DPH:
5786 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
5787 return GL_FALSE;
5788 break;
5789
5790 case OPCODE_DST:
5791 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
5792 return GL_FALSE;
5793 break;
5794
5795 case OPCODE_EX2:
5796 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
5797 return GL_FALSE;
5798 break;
5799 case OPCODE_EXP:
5800 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
5801 return GL_FALSE;
5802 break;
5803
5804 case OPCODE_FLR:
5805 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
5806 return GL_FALSE;
5807 break;
5808 //case OP_FLR_INT:
5809 // if ( GL_FALSE == assemble_FLR_INT() )
5810 // return GL_FALSE;
5811 // break;
5812
5813 case OPCODE_FRC:
5814 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
5815 return GL_FALSE;
5816 break;
5817
5818 case OPCODE_KIL:
5819 if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
5820 return GL_FALSE;
5821 break;
5822 case OPCODE_LG2:
5823 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
5824 return GL_FALSE;
5825 break;
5826 case OPCODE_LIT:
5827 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
5828 return GL_FALSE;
5829 break;
5830 case OPCODE_LRP:
5831 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
5832 return GL_FALSE;
5833 break;
5834 case OPCODE_LOG:
5835 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
5836 return GL_FALSE;
5837 break;
5838
5839 case OPCODE_MAD:
5840 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
5841 return GL_FALSE;
5842 break;
5843 case OPCODE_MAX:
5844 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
5845 return GL_FALSE;
5846 break;
5847 case OPCODE_MIN:
5848 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
5849 return GL_FALSE;
5850 break;
5851
5852 case OPCODE_MOV:
5853 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5854 return GL_FALSE;
5855 break;
5856 case OPCODE_MUL:
5857 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
5858 return GL_FALSE;
5859 break;
5860
5861 case OPCODE_POW:
5862 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
5863 return GL_FALSE;
5864 break;
5865 case OPCODE_RCP:
5866 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
5867 return GL_FALSE;
5868 break;
5869 case OPCODE_RSQ:
5870 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
5871 return GL_FALSE;
5872 break;
5873 case OPCODE_SIN:
5874 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
5875 return GL_FALSE;
5876 break;
5877 case OPCODE_SCS:
5878 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
5879 return GL_FALSE;
5880 break;
5881
5882 case OPCODE_SEQ:
5883 if(OPCODE_IF == pILInst[i+1].Opcode)
5884 {
5885 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5886 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5887 {
5888 return GL_FALSE;
5889 }
5890 }
5891 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5892 {
5893 #ifdef USE_CF_FOR_CONTINUE_BREAK
5894 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5895 #else
5896 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5897 #endif
5898 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5899 {
5900 return GL_FALSE;
5901 }
5902 }
5903 else if(OPCODE_CONT == pILInst[i+1].Opcode)
5904 {
5905 #ifdef USE_CF_FOR_CONTINUE_BREAK
5906 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5907 #else
5908 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
5909 #endif
5910 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5911 {
5912 return GL_FALSE;
5913 }
5914 }
5915 else
5916 {
5917 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
5918 {
5919 return GL_FALSE;
5920 }
5921 }
5922 break;
5923
5924 case OPCODE_SGT:
5925 if(OPCODE_IF == pILInst[i+1].Opcode)
5926 {
5927 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5928 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5929 {
5930 return GL_FALSE;
5931 }
5932 }
5933 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5934 {
5935 #ifdef USE_CF_FOR_CONTINUE_BREAK
5936 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5937 #else
5938 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5939 #endif
5940 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5941 {
5942 return GL_FALSE;
5943 }
5944 }
5945 else if(OPCODE_CONT == pILInst[i+1].Opcode)
5946 {
5947 #ifdef USE_CF_FOR_CONTINUE_BREAK
5948 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5949 #else
5950 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
5951 #endif
5952
5953 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
5954 {
5955 return GL_FALSE;
5956 }
5957 }
5958 else
5959 {
5960 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
5961 {
5962 return GL_FALSE;
5963 }
5964 }
5965 break;
5966
5967 case OPCODE_SGE:
5968 if(OPCODE_IF == pILInst[i+1].Opcode)
5969 {
5970 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5971 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
5972 {
5973 return GL_FALSE;
5974 }
5975 }
5976 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5977 {
5978 #ifdef USE_CF_FOR_CONTINUE_BREAK
5979 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5980 #else
5981 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5982 #endif
5983 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
5984 {
5985 return GL_FALSE;
5986 }
5987 }
5988 else if(OPCODE_CONT == pILInst[i+1].Opcode)
5989 {
5990 #ifdef USE_CF_FOR_CONTINUE_BREAK
5991 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5992 #else
5993 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
5994 #endif
5995
5996 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
5997 {
5998 return GL_FALSE;
5999 }
6000 }
6001 else
6002 {
6003 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
6004 {
6005 return GL_FALSE;
6006 }
6007 }
6008 break;
6009
6010 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
6011 case OPCODE_SLT:
6012 {
6013 struct prog_src_register SrcRegSave[2];
6014 SrcRegSave[0] = pILInst[i].SrcReg[0];
6015 SrcRegSave[1] = pILInst[i].SrcReg[1];
6016 pILInst[i].SrcReg[0] = SrcRegSave[1];
6017 pILInst[i].SrcReg[1] = SrcRegSave[0];
6018 if(OPCODE_IF == pILInst[i+1].Opcode)
6019 {
6020 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6021 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6022 {
6023 pILInst[i].SrcReg[0] = SrcRegSave[0];
6024 pILInst[i].SrcReg[1] = SrcRegSave[1];
6025 return GL_FALSE;
6026 }
6027 }
6028 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6029 {
6030 #ifdef USE_CF_FOR_CONTINUE_BREAK
6031 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6032 #else
6033 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6034 #endif
6035 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6036 {
6037 pILInst[i].SrcReg[0] = SrcRegSave[0];
6038 pILInst[i].SrcReg[1] = SrcRegSave[1];
6039 return GL_FALSE;
6040 }
6041 }
6042 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6043 {
6044 #ifdef USE_CF_FOR_CONTINUE_BREAK
6045 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6046 #else
6047 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6048 #endif
6049
6050 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6051 {
6052 pILInst[i].SrcReg[0] = SrcRegSave[0];
6053 pILInst[i].SrcReg[1] = SrcRegSave[1];
6054 return GL_FALSE;
6055 }
6056 }
6057 else
6058 {
6059 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
6060 {
6061 pILInst[i].SrcReg[0] = SrcRegSave[0];
6062 pILInst[i].SrcReg[1] = SrcRegSave[1];
6063 return GL_FALSE;
6064 }
6065 }
6066 pILInst[i].SrcReg[0] = SrcRegSave[0];
6067 pILInst[i].SrcReg[1] = SrcRegSave[1];
6068 }
6069 break;
6070
6071 case OPCODE_SLE:
6072 {
6073 struct prog_src_register SrcRegSave[2];
6074 SrcRegSave[0] = pILInst[i].SrcReg[0];
6075 SrcRegSave[1] = pILInst[i].SrcReg[1];
6076 pILInst[i].SrcReg[0] = SrcRegSave[1];
6077 pILInst[i].SrcReg[1] = SrcRegSave[0];
6078 if(OPCODE_IF == pILInst[i+1].Opcode)
6079 {
6080 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6081 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6082 {
6083 pILInst[i].SrcReg[0] = SrcRegSave[0];
6084 pILInst[i].SrcReg[1] = SrcRegSave[1];
6085 return GL_FALSE;
6086 }
6087 }
6088 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6089 {
6090 #ifdef USE_CF_FOR_CONTINUE_BREAK
6091 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6092 #else
6093 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6094 #endif
6095 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6096 {
6097 pILInst[i].SrcReg[0] = SrcRegSave[0];
6098 pILInst[i].SrcReg[1] = SrcRegSave[1];
6099 return GL_FALSE;
6100 }
6101 }
6102 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6103 {
6104 #ifdef USE_CF_FOR_CONTINUE_BREAK
6105 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6106 #else
6107 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6108 #endif
6109
6110 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6111 {
6112 pILInst[i].SrcReg[0] = SrcRegSave[0];
6113 pILInst[i].SrcReg[1] = SrcRegSave[1];
6114 return GL_FALSE;
6115 }
6116 }
6117 else
6118 {
6119 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
6120 {
6121 pILInst[i].SrcReg[0] = SrcRegSave[0];
6122 pILInst[i].SrcReg[1] = SrcRegSave[1];
6123 return GL_FALSE;
6124 }
6125 }
6126 pILInst[i].SrcReg[0] = SrcRegSave[0];
6127 pILInst[i].SrcReg[1] = SrcRegSave[1];
6128 }
6129 break;
6130
6131 case OPCODE_SNE:
6132 if(OPCODE_IF == pILInst[i+1].Opcode)
6133 {
6134 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6135 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6136 {
6137 return GL_FALSE;
6138 }
6139 }
6140 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6141 {
6142 #ifdef USE_CF_FOR_CONTINUE_BREAK
6143 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6144 #else
6145 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6146 #endif
6147 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6148 {
6149 return GL_FALSE;
6150 }
6151 }
6152 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6153 {
6154 #ifdef USE_CF_FOR_CONTINUE_BREAK
6155 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6156 #else
6157 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6158 #endif
6159 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6160 {
6161 return GL_FALSE;
6162 }
6163 }
6164 else
6165 {
6166 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
6167 {
6168 return GL_FALSE;
6169 }
6170 }
6171 break;
6172
6173 //case OP_STP:
6174 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
6175 // return GL_FALSE;
6176 // break;
6177
6178 case OPCODE_SWZ:
6179 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
6180 {
6181 return GL_FALSE;
6182 }
6183 else
6184 {
6185 if( (i+1)<uiNumberInsts )
6186 {
6187 if(OPCODE_END != pILInst[i+1].Opcode)
6188 {
6189 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
6190 {
6191 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
6192 }
6193 }
6194 }
6195 }
6196 break;
6197
6198 case OPCODE_TEX:
6199 case OPCODE_TXB:
6200 case OPCODE_TXP:
6201 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
6202 return GL_FALSE;
6203 break;
6204
6205 case OPCODE_XPD:
6206 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
6207 return GL_FALSE;
6208 break;
6209
6210 case OPCODE_IF :
6211 {
6212 GLboolean bHasElse = GL_FALSE;
6213
6214 if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE)
6215 {
6216 bHasElse = GL_TRUE;
6217 }
6218
6219 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
6220 {
6221 return GL_FALSE;
6222 }
6223 }
6224 break;
6225
6226 case OPCODE_ELSE :
6227 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
6228 return GL_FALSE;
6229 break;
6230
6231 case OPCODE_ENDIF:
6232 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
6233 return GL_FALSE;
6234 break;
6235
6236 case OPCODE_BGNLOOP:
6237 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
6238 {
6239 return GL_FALSE;
6240 }
6241 break;
6242
6243 case OPCODE_BRK:
6244 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
6245 {
6246 return GL_FALSE;
6247 }
6248 break;
6249
6250 case OPCODE_CONT:
6251 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
6252 {
6253 return GL_FALSE;
6254 }
6255 break;
6256
6257 case OPCODE_ENDLOOP:
6258 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
6259 {
6260 return GL_FALSE;
6261 }
6262 break;
6263
6264 case OPCODE_BGNSUB:
6265 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
6266 {
6267 return GL_FALSE;
6268 }
6269 break;
6270
6271 case OPCODE_RET:
6272 if( GL_FALSE == assemble_RET(pR700AsmCode) )
6273 {
6274 return GL_FALSE;
6275 }
6276 break;
6277
6278 case OPCODE_CAL:
6279 if( GL_FALSE == assemble_CAL(pR700AsmCode,
6280 pILInst[i].BranchTarget,
6281 uiNumberInsts,
6282 pILInst) )
6283 {
6284 return GL_FALSE;
6285 }
6286 break;
6287
6288 //case OPCODE_EXPORT:
6289 // if ( GL_FALSE == assemble_EXPORT() )
6290 // return GL_FALSE;
6291 // break;
6292
6293 case OPCODE_ENDSUB:
6294 return assemble_ENDSUB(pR700AsmCode);
6295
6296 case OPCODE_END:
6297 //pR700AsmCode->uiCurInst = i;
6298 //This is to remaind that if in later exoort there is depth/stencil
6299 //export, we need a mov to re-arrange DST channel, where using a
6300 //psuedo inst, we will use this end inst to do it.
6301 return GL_TRUE;
6302
6303 default:
6304 radeon_error("internal: unknown instruction\n");
6305 return GL_FALSE;
6306 }
6307 }
6308
6309 return GL_TRUE;
6310 }
6311
6312 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
6313 {
6314 setRetInLoopFlag(pAsm, SQ_SEL_0);
6315 return GL_TRUE;
6316 }
6317
6318 GLboolean RelocProgram(r700_AssemblerBase * pAsm)
6319 {
6320 GLuint i;
6321 GLuint unCFoffset;
6322 TypedShaderList * plstCFmain;
6323 TypedShaderList * plstCFsub;
6324
6325 R700ShaderInstruction * pInst;
6326 R700ControlFlowGenericClause * pCFInst;
6327
6328 if(0 == pAsm->unSubArrayPointer)
6329 {
6330 return GL_TRUE;
6331 }
6332
6333 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
6334 unCFoffset = plstCFmain->uNumOfNode;
6335
6336 /* Reloc subs */
6337 for(i=0; i<pAsm->unSubArrayPointer; i++)
6338 {
6339 pAsm->subs[i].unCFoffset = unCFoffset;
6340 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
6341
6342 pInst = plstCFsub->pHead;
6343
6344 /* reloc instructions */
6345 while(pInst)
6346 {
6347 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
6348 {
6349 pCFInst = (R700ControlFlowGenericClause *)pInst;
6350
6351 switch (pCFInst->m_Word1.f.cf_inst)
6352 {
6353 case SQ_CF_INST_POP:
6354 case SQ_CF_INST_JUMP:
6355 case SQ_CF_INST_ELSE:
6356 case SQ_CF_INST_LOOP_END:
6357 case SQ_CF_INST_LOOP_START:
6358 case SQ_CF_INST_LOOP_START_NO_AL:
6359 case SQ_CF_INST_LOOP_CONTINUE:
6360 case SQ_CF_INST_LOOP_BREAK:
6361 pCFInst->m_Word0.f.addr += unCFoffset;
6362 break;
6363 default:
6364 break;
6365 }
6366 }
6367
6368 pInst->m_uIndex += unCFoffset;
6369
6370 pInst = pInst->pNextInst;
6371 };
6372
6373 /* Put sub into main */
6374 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
6375 plstCFmain->pTail = plstCFsub->pTail;
6376 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
6377
6378 unCFoffset += plstCFsub->uNumOfNode;
6379 }
6380
6381 /* reloc callers */
6382 for(i=0; i<pAsm->unCallerArrayPointer; i++)
6383 {
6384 pAsm->callers[i].cf_ptr->m_Word0.f.addr
6385 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
6386 }
6387
6388 /* remove flags init if they are not used */
6389 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
6390 {
6391 R700ControlFlowALUClause * pCF_ALU;
6392 pInst = plstCFmain->pHead;
6393 while(pInst)
6394 {
6395 if(SIT_CF_ALU == pInst->m_ShaderInstType)
6396 {
6397 pCF_ALU = (R700ControlFlowALUClause *)pInst;
6398 if(1 == pCF_ALU->m_Word1.f.count)
6399 {
6400 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
6401 }
6402 else
6403 {
6404 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
6405
6406 pALU->m_pLinkedALUClause = NULL;
6407 pALU = (R700ALUInstruction *)(pALU->pNextInst);
6408 pALU->m_pLinkedALUClause = pCF_ALU;
6409 pCF_ALU->m_pLinkedALUInstruction = pALU;
6410
6411 pCF_ALU->m_Word1.f.count--;
6412 }
6413 break;
6414 }
6415 pInst = pInst->pNextInst;
6416 };
6417 }
6418
6419 return GL_TRUE;
6420 }
6421
6422 GLboolean Process_Export(r700_AssemblerBase* pAsm,
6423 GLuint type,
6424 GLuint export_starting_index,
6425 GLuint export_count,
6426 GLuint starting_register_number,
6427 GLboolean is_depth_export)
6428 {
6429 unsigned char ucWriteMask;
6430
6431 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
6432 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
6433
6434 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
6435
6436 switch (type)
6437 {
6438 case SQ_EXPORT_PIXEL:
6439 if(GL_TRUE == is_depth_export)
6440 {
6441 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
6442 }
6443 else
6444 {
6445 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
6446 }
6447 break;
6448
6449 case SQ_EXPORT_POS:
6450 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
6451 break;
6452
6453 case SQ_EXPORT_PARAM:
6454 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
6455 break;
6456
6457 default:
6458 radeon_error("Unknown export type: %d\n", type);
6459 return GL_FALSE;
6460 break;
6461 }
6462
6463 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
6464
6465 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
6466 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
6467 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
6468
6469 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
6470 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
6471 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6472 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
6473 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6474 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
6475
6476 if (export_count == 1)
6477 {
6478 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
6479 /* exports Z as a float into Red channel */
6480 if (GL_TRUE == is_depth_export)
6481 ucWriteMask = 0x1;
6482
6483 if( (ucWriteMask & 0x1) != 0)
6484 {
6485 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6486 }
6487 else
6488 {
6489 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
6490 }
6491 if( ((ucWriteMask>>1) & 0x1) != 0)
6492 {
6493 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6494 }
6495 else
6496 {
6497 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
6498 }
6499 if( ((ucWriteMask>>2) & 0x1) != 0)
6500 {
6501 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6502 }
6503 else
6504 {
6505 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
6506 }
6507 if( ((ucWriteMask>>3) & 0x1) != 0)
6508 {
6509 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6510 }
6511 else
6512 {
6513 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
6514 }
6515 }
6516 else
6517 {
6518 // This should only be used if all components for all registers have been written
6519 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6520 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6521 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6522 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6523 }
6524
6525 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
6526
6527 return GL_TRUE;
6528 }
6529
6530 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
6531 {
6532 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
6533 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
6534
6535 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6536
6537 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6538
6539 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6540 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6541 pAsm->D.dst.reg = pAsm->depth_export_register_number;
6542
6543 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
6544
6545 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6546 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6547 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
6548
6549 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
6550
6551 noneg_PVSSRC(&(pAsm->S[0].src));
6552
6553 if( GL_FALSE == next_ins(pAsm) )
6554 {
6555 return GL_FALSE;
6556 }
6557
6558 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
6559
6560 return GL_TRUE;
6561 }
6562
6563 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
6564 GLbitfield OutputsWritten)
6565 {
6566 unsigned int unBit;
6567 GLuint export_count = 0;
6568
6569 if(pR700AsmCode->depth_export_register_number >= 0)
6570 {
6571 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
6572 {
6573 return GL_FALSE;
6574 }
6575 }
6576
6577 unBit = 1 << FRAG_RESULT_COLOR;
6578 if(OutputsWritten & unBit)
6579 {
6580 if( GL_FALSE == Process_Export(pR700AsmCode,
6581 SQ_EXPORT_PIXEL,
6582 0,
6583 1,
6584 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
6585 GL_FALSE) )
6586 {
6587 return GL_FALSE;
6588 }
6589 export_count++;
6590 }
6591 unBit = 1 << FRAG_RESULT_DEPTH;
6592 if(OutputsWritten & unBit)
6593 {
6594 if( GL_FALSE == Process_Export(pR700AsmCode,
6595 SQ_EXPORT_PIXEL,
6596 0,
6597 1,
6598 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
6599 GL_TRUE))
6600 {
6601 return GL_FALSE;
6602 }
6603 export_count++;
6604 }
6605 /* Need to export something, otherwise we'll hang
6606 * results are undefined anyway */
6607 if(export_count == 0)
6608 {
6609 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
6610 }
6611
6612 if(pR700AsmCode->cf_last_export_ptr != NULL)
6613 {
6614 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6615 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6616 }
6617
6618 return GL_TRUE;
6619 }
6620
6621 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
6622 GLbitfield OutputsWritten)
6623 {
6624 unsigned int unBit;
6625 unsigned int i;
6626
6627 GLuint export_starting_index = 0;
6628 GLuint export_count = pR700AsmCode->number_of_exports;
6629
6630 unBit = 1 << VERT_RESULT_HPOS;
6631 if(OutputsWritten & unBit)
6632 {
6633 if( GL_FALSE == Process_Export(pR700AsmCode,
6634 SQ_EXPORT_POS,
6635 export_starting_index,
6636 1,
6637 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
6638 GL_FALSE) )
6639 {
6640 return GL_FALSE;
6641 }
6642
6643 export_count--;
6644
6645 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6646 }
6647
6648 pR700AsmCode->number_of_exports = export_count;
6649
6650 unBit = 1 << VERT_RESULT_COL0;
6651 if(OutputsWritten & unBit)
6652 {
6653 if( GL_FALSE == Process_Export(pR700AsmCode,
6654 SQ_EXPORT_PARAM,
6655 export_starting_index,
6656 1,
6657 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
6658 GL_FALSE) )
6659 {
6660 return GL_FALSE;
6661 }
6662
6663 export_starting_index++;
6664 }
6665
6666 unBit = 1 << VERT_RESULT_COL1;
6667 if(OutputsWritten & unBit)
6668 {
6669 if( GL_FALSE == Process_Export(pR700AsmCode,
6670 SQ_EXPORT_PARAM,
6671 export_starting_index,
6672 1,
6673 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
6674 GL_FALSE) )
6675 {
6676 return GL_FALSE;
6677 }
6678
6679 export_starting_index++;
6680 }
6681
6682 unBit = 1 << VERT_RESULT_FOGC;
6683 if(OutputsWritten & unBit)
6684 {
6685 if( GL_FALSE == Process_Export(pR700AsmCode,
6686 SQ_EXPORT_PARAM,
6687 export_starting_index,
6688 1,
6689 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
6690 GL_FALSE) )
6691 {
6692 return GL_FALSE;
6693 }
6694
6695 export_starting_index++;
6696 }
6697
6698 for(i=0; i<8; i++)
6699 {
6700 unBit = 1 << (VERT_RESULT_TEX0 + i);
6701 if(OutputsWritten & unBit)
6702 {
6703 if( GL_FALSE == Process_Export(pR700AsmCode,
6704 SQ_EXPORT_PARAM,
6705 export_starting_index,
6706 1,
6707 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
6708 GL_FALSE) )
6709 {
6710 return GL_FALSE;
6711 }
6712
6713 export_starting_index++;
6714 }
6715 }
6716
6717 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
6718 {
6719 unBit = 1 << i;
6720 if(OutputsWritten & unBit)
6721 {
6722 if( GL_FALSE == Process_Export(pR700AsmCode,
6723 SQ_EXPORT_PARAM,
6724 export_starting_index,
6725 1,
6726 pR700AsmCode->ucVP_OutputMap[i],
6727 GL_FALSE) )
6728 {
6729 return GL_FALSE;
6730 }
6731
6732 export_starting_index++;
6733 }
6734 }
6735
6736 // At least one param should be exported
6737 if (export_count)
6738 {
6739 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6740 }
6741 else
6742 {
6743 if( GL_FALSE == Process_Export(pR700AsmCode,
6744 SQ_EXPORT_PARAM,
6745 0,
6746 1,
6747 pR700AsmCode->starting_export_register_number,
6748 GL_FALSE) )
6749 {
6750 return GL_FALSE;
6751 }
6752
6753 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
6754 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
6755 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
6756 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
6757 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6758 }
6759
6760 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6761
6762 return GL_TRUE;
6763 }
6764
6765 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
6766 {
6767 FREE(pR700AsmCode->pucOutMask);
6768 FREE(pR700AsmCode->pInstDeps);
6769
6770 if(NULL != pR700AsmCode->subs)
6771 {
6772 FREE(pR700AsmCode->subs);
6773 }
6774 if(NULL != pR700AsmCode->callers)
6775 {
6776 FREE(pR700AsmCode->callers);
6777 }
6778
6779 return GL_TRUE;
6780 }
6781