r600 : add support for shader instruction trunc and discard.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 #define USE_CF_FOR_CONTINUE_BREAK 1
42 #define USE_CF_FOR_POP_AFTER 1
43
44 BITS addrmode_PVSDST(PVSDST * pPVSDST)
45 {
46 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
47 }
48
49 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
50 {
51 pPVSDST->addrmode0 = addrmode & 1;
52 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
53 }
54
55 void nomask_PVSDST(PVSDST * pPVSDST)
56 {
57 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
58 }
59
60 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
61 {
62 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
63 }
64
65 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
66 {
67 pPVSSRC->addrmode0 = addrmode & 1;
68 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
69 }
70
71
72 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
73 {
74 pPVSSRC->swizzlex =
75 pPVSSRC->swizzley =
76 pPVSSRC->swizzlez =
77 pPVSSRC->swizzlew = swz;
78 }
79
80 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
81 {
82 pPVSSRC->swizzlex = SQ_SEL_X;
83 pPVSSRC->swizzley = SQ_SEL_Y;
84 pPVSSRC->swizzlez = SQ_SEL_Z;
85 pPVSSRC->swizzlew = SQ_SEL_W;
86 }
87
88 void
89 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
90 {
91 switch (x)
92 {
93 case SQ_SEL_X: x = pPVSSRC->swizzlex;
94 break;
95 case SQ_SEL_Y: x = pPVSSRC->swizzley;
96 break;
97 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
98 break;
99 case SQ_SEL_W: x = pPVSSRC->swizzlew;
100 break;
101 default:;
102 }
103
104 switch (y)
105 {
106 case SQ_SEL_X: y = pPVSSRC->swizzlex;
107 break;
108 case SQ_SEL_Y: y = pPVSSRC->swizzley;
109 break;
110 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
111 break;
112 case SQ_SEL_W: y = pPVSSRC->swizzlew;
113 break;
114 default:;
115 }
116
117 switch (z)
118 {
119 case SQ_SEL_X: z = pPVSSRC->swizzlex;
120 break;
121 case SQ_SEL_Y: z = pPVSSRC->swizzley;
122 break;
123 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
124 break;
125 case SQ_SEL_W: z = pPVSSRC->swizzlew;
126 break;
127 default:;
128 }
129
130 switch (w)
131 {
132 case SQ_SEL_X: w = pPVSSRC->swizzlex;
133 break;
134 case SQ_SEL_Y: w = pPVSSRC->swizzley;
135 break;
136 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
137 break;
138 case SQ_SEL_W: w = pPVSSRC->swizzlew;
139 break;
140 default:;
141 }
142
143 pPVSSRC->swizzlex = x;
144 pPVSSRC->swizzley = y;
145 pPVSSRC->swizzlez = z;
146 pPVSSRC->swizzlew = w;
147 }
148
149 void neg_PVSSRC(PVSSRC* pPVSSRC)
150 {
151 pPVSSRC->negx = 1;
152 pPVSSRC->negy = 1;
153 pPVSSRC->negz = 1;
154 pPVSSRC->negw = 1;
155 }
156
157 void noneg_PVSSRC(PVSSRC* pPVSSRC)
158 {
159 pPVSSRC->negx = 0;
160 pPVSSRC->negy = 0;
161 pPVSSRC->negz = 0;
162 pPVSSRC->negw = 0;
163 }
164
165 // negate argument (for SUB instead of ADD and alike)
166 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
167 {
168 pPVSSRC->negx = !pPVSSRC->negx;
169 pPVSSRC->negy = !pPVSSRC->negy;
170 pPVSSRC->negz = !pPVSSRC->negz;
171 pPVSSRC->negw = !pPVSSRC->negw;
172 }
173
174 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
175 {
176 switch (c)
177 {
178 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
179 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
180 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
181 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
182 default:;
183 }
184 }
185
186 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
187 {
188 switch (c)
189 {
190 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
191 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
192 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
193 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
194 default:;
195 }
196 }
197
198 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
199 {
200 return (pOutVTXFmt0->point_size |
201 pOutVTXFmt0->edge_flag |
202 pOutVTXFmt0->rta_index |
203 pOutVTXFmt0->kill_flag |
204 pOutVTXFmt0->viewport_index);
205 }
206
207 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
208 {
209 return (pFPOutFmt->depth |
210 pFPOutFmt->stencil_ref |
211 pFPOutFmt->mask |
212 pFPOutFmt->coverage_to_mask);
213 }
214
215 GLboolean is_reduction_opcode(PVSDWORD* dest)
216 {
217 if (dest->dst.op3 == 0)
218 {
219 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
220 {
221 return GL_TRUE;
222 }
223 }
224 return GL_FALSE;
225 }
226
227 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
228 {
229 GLuint format = FMT_INVALID;
230 GLuint uiElemSize = 0;
231
232 switch (eType)
233 {
234 case GL_BYTE:
235 case GL_UNSIGNED_BYTE:
236 uiElemSize = 1;
237 switch(nChannels)
238 {
239 case 1:
240 format = FMT_8; break;
241 case 2:
242 format = FMT_8_8; break;
243 case 3:
244 format = FMT_8_8_8; break;
245 case 4:
246 format = FMT_8_8_8_8; break;
247 default:
248 break;
249 }
250 break;
251
252 case GL_UNSIGNED_SHORT:
253 case GL_SHORT:
254 uiElemSize = 2;
255 switch(nChannels)
256 {
257 case 1:
258 format = FMT_16; break;
259 case 2:
260 format = FMT_16_16; break;
261 case 3:
262 format = FMT_16_16_16; break;
263 case 4:
264 format = FMT_16_16_16_16; break;
265 default:
266 break;
267 }
268 break;
269
270 case GL_UNSIGNED_INT:
271 case GL_INT:
272 uiElemSize = 4;
273 switch(nChannels)
274 {
275 case 1:
276 format = FMT_32; break;
277 case 2:
278 format = FMT_32_32; break;
279 case 3:
280 format = FMT_32_32_32; break;
281 case 4:
282 format = FMT_32_32_32_32; break;
283 default:
284 break;
285 }
286 break;
287
288 case GL_FLOAT:
289 uiElemSize = 4;
290 switch(nChannels)
291 {
292 case 1:
293 format = FMT_32_FLOAT; break;
294 case 2:
295 format = FMT_32_32_FLOAT; break;
296 case 3:
297 format = FMT_32_32_32_FLOAT; break;
298 case 4:
299 format = FMT_32_32_32_32_FLOAT; break;
300 default:
301 break;
302 }
303 break;
304 case GL_DOUBLE:
305 uiElemSize = 8;
306 switch(nChannels)
307 {
308 case 1:
309 format = FMT_32_FLOAT; break;
310 case 2:
311 format = FMT_32_32_FLOAT; break;
312 case 3:
313 format = FMT_32_32_32_FLOAT; break;
314 case 4:
315 format = FMT_32_32_32_32_FLOAT; break;
316 default:
317 break;
318 }
319 break;
320 default:
321 ;
322 //GL_ASSERT_NO_CASE();
323 }
324
325 if(NULL != pClient_size)
326 {
327 *pClient_size = uiElemSize * nChannels;
328 }
329
330 return(format);
331 }
332
333 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
334 {
335 if(pAsm->D.dst.op3)
336 {
337 return 3;
338 }
339
340 switch (pAsm->D.dst.opcode)
341 {
342 case SQ_OP2_INST_ADD:
343 case SQ_OP2_INST_KILLE:
344 case SQ_OP2_INST_KILLGT:
345 case SQ_OP2_INST_KILLGE:
346 case SQ_OP2_INST_KILLNE:
347 case SQ_OP2_INST_MUL:
348 case SQ_OP2_INST_MAX:
349 case SQ_OP2_INST_MIN:
350 //case SQ_OP2_INST_MAX_DX10:
351 //case SQ_OP2_INST_MIN_DX10:
352 case SQ_OP2_INST_SETE:
353 case SQ_OP2_INST_SETNE:
354 case SQ_OP2_INST_SETGT:
355 case SQ_OP2_INST_SETGE:
356 case SQ_OP2_INST_PRED_SETE:
357 case SQ_OP2_INST_PRED_SETGT:
358 case SQ_OP2_INST_PRED_SETGE:
359 case SQ_OP2_INST_PRED_SETNE:
360 case SQ_OP2_INST_DOT4:
361 case SQ_OP2_INST_DOT4_IEEE:
362 case SQ_OP2_INST_CUBE:
363 return 2;
364
365 case SQ_OP2_INST_MOV:
366 case SQ_OP2_INST_MOVA_FLOOR:
367 case SQ_OP2_INST_FRACT:
368 case SQ_OP2_INST_FLOOR:
369 case SQ_OP2_INST_TRUNC:
370 case SQ_OP2_INST_EXP_IEEE:
371 case SQ_OP2_INST_LOG_CLAMPED:
372 case SQ_OP2_INST_LOG_IEEE:
373 case SQ_OP2_INST_RECIP_IEEE:
374 case SQ_OP2_INST_RECIPSQRT_IEEE:
375 case SQ_OP2_INST_FLT_TO_INT:
376 case SQ_OP2_INST_SIN:
377 case SQ_OP2_INST_COS:
378 return 1;
379
380 default: radeon_error(
381 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
382 };
383
384 return 3;
385 }
386
387 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
388 {
389 GLuint i;
390
391 Init_R700_Shader(pShader);
392 pAsm->pR700Shader = pShader;
393 pAsm->currentShaderType = spt;
394
395 pAsm->cf_last_export_ptr = NULL;
396
397 pAsm->cf_current_export_clause_ptr = NULL;
398 pAsm->cf_current_alu_clause_ptr = NULL;
399 pAsm->cf_current_tex_clause_ptr = NULL;
400 pAsm->cf_current_vtx_clause_ptr = NULL;
401 pAsm->cf_current_cf_clause_ptr = NULL;
402
403 // No clause has been created yet
404 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
405
406 pAsm->number_of_colorandz_exports = 0;
407 pAsm->number_of_exports = 0;
408 pAsm->number_of_export_opcodes = 0;
409
410 pAsm->alu_x_opcode = 0;
411
412 pAsm->D2.bits = 0;
413
414 pAsm->D.bits = 0;
415 pAsm->S[0].bits = 0;
416 pAsm->S[1].bits = 0;
417 pAsm->S[2].bits = 0;
418
419 pAsm->uLastPosUpdate = 0;
420
421 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
422
423 pAsm->uIIns = 0;
424 pAsm->uOIns = 0;
425 pAsm->number_used_registers = 0;
426 pAsm->uUsedConsts = 256;
427
428
429 // Fragment programs
430 pAsm->uBoolConsts = 0;
431 pAsm->uIntConsts = 0;
432 pAsm->uInsts = 0;
433 pAsm->uConsts = 0;
434
435 pAsm->FCSP = 0;
436 pAsm->fc_stack[0].type = FC_NONE;
437
438 pAsm->aArgSubst[0] =
439 pAsm->aArgSubst[1] =
440 pAsm->aArgSubst[2] =
441 pAsm->aArgSubst[3] = (-1);
442
443 pAsm->uOutputs = 0;
444
445 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
446 {
447 pAsm->color_export_register_number[i] = (-1);
448 }
449
450
451 pAsm->depth_export_register_number = (-1);
452 pAsm->stencil_export_register_number = (-1);
453 pAsm->coverage_to_mask_export_register_number = (-1);
454 pAsm->mask_export_register_number = (-1);
455
456 pAsm->starting_export_register_number = 0;
457 pAsm->starting_vfetch_register_number = 0;
458 pAsm->starting_temp_register_number = 0;
459 pAsm->uFirstHelpReg = 0;
460
461 pAsm->input_position_is_used = GL_FALSE;
462 pAsm->input_normal_is_used = GL_FALSE;
463
464 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
465 {
466 pAsm->input_color_is_used[ i ] = GL_FALSE;
467 }
468
469 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
470 {
471 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
472 }
473
474 for (i=0; i<VERT_ATTRIB_MAX; i++)
475 {
476 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
477 }
478
479 pAsm->number_of_inputs = 0;
480
481 pAsm->is_tex = GL_FALSE;
482 pAsm->need_tex_barrier = GL_FALSE;
483
484 pAsm->subs = NULL;
485 pAsm->unSubArraySize = 0;
486 pAsm->unSubArrayPointer = 0;
487 pAsm->callers = NULL;
488 pAsm->unCallerArraySize = 0;
489 pAsm->unCallerArrayPointer = 0;
490
491 pAsm->CALLSP = 0;
492 pAsm->CALLSTACK[0].FCSP_BeforeEntry = 0;
493 pAsm->CALLSTACK[0].plstCFInstructions_local
494 = &(pAsm->pR700Shader->lstCFInstructions);
495
496 pAsm->CALLSTACK[0].max = 0;
497 pAsm->CALLSTACK[0].current = 0;
498
499 SetActiveCFlist(pAsm->pR700Shader, pAsm->CALLSTACK[0].plstCFInstructions_local);
500
501 pAsm->unCFflags = 0;
502
503 return 0;
504 }
505
506 GLboolean IsTex(gl_inst_opcode Opcode)
507 {
508 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
509 {
510 return GL_TRUE;
511 }
512 return GL_FALSE;
513 }
514
515 GLboolean IsAlu(gl_inst_opcode Opcode)
516 {
517 //TODO : more for fc and ex for higher spec.
518 if( IsTex(Opcode) )
519 {
520 return GL_FALSE;
521 }
522 return GL_TRUE;
523 }
524
525 int check_current_clause(r700_AssemblerBase* pAsm,
526 CF_CLAUSE_TYPE new_clause_type)
527 {
528 if (pAsm->cf_current_clause_type != new_clause_type)
529 { //Close last open clause
530 switch (pAsm->cf_current_clause_type)
531 {
532 case CF_ALU_CLAUSE:
533 if ( pAsm->cf_current_alu_clause_ptr != NULL)
534 {
535 pAsm->cf_current_alu_clause_ptr = NULL;
536 }
537 break;
538 case CF_VTX_CLAUSE:
539 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
540 {
541 pAsm->cf_current_vtx_clause_ptr = NULL;
542 }
543 break;
544 case CF_TEX_CLAUSE:
545 if ( pAsm->cf_current_tex_clause_ptr != NULL)
546 {
547 pAsm->cf_current_tex_clause_ptr = NULL;
548 }
549 break;
550 case CF_EXPORT_CLAUSE:
551 if ( pAsm->cf_current_export_clause_ptr != NULL)
552 {
553 pAsm->cf_current_export_clause_ptr = NULL;
554 }
555 break;
556 case CF_OTHER_CLAUSE:
557 if ( pAsm->cf_current_cf_clause_ptr != NULL)
558 {
559 pAsm->cf_current_cf_clause_ptr = NULL;
560 }
561 break;
562 case CF_EMPTY_CLAUSE:
563 break;
564 default:
565 radeon_error(
566 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
567 return GL_FALSE;
568 }
569
570 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
571
572 // Create new clause
573 switch (new_clause_type)
574 {
575 case CF_ALU_CLAUSE:
576 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
577 break;
578 case CF_VTX_CLAUSE:
579 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
580 break;
581 case CF_TEX_CLAUSE:
582 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
583 break;
584 case CF_EXPORT_CLAUSE:
585 {
586 R700ControlFlowSXClause* pR700ControlFlowSXClause
587 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
588
589 // Add new export instruction to control flow program
590 if (pR700ControlFlowSXClause != 0)
591 {
592 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
593 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
594 AddCFInstruction( pAsm->pR700Shader,
595 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
596 }
597 else
598 {
599 radeon_error(
600 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
601 return GL_FALSE;
602 }
603 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
604 }
605 break;
606 case CF_EMPTY_CLAUSE:
607 break;
608 case CF_OTHER_CLAUSE:
609 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
610 break;
611 default:
612 radeon_error(
613 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
614 return GL_FALSE;
615 }
616 }
617
618 return GL_TRUE;
619 }
620
621 GLboolean add_cf_instruction(r700_AssemblerBase* pAsm)
622 {
623 if(GL_FALSE == check_current_clause(pAsm, CF_OTHER_CLAUSE))
624 {
625 return GL_FALSE;
626 }
627
628 pAsm->cf_current_cf_clause_ptr =
629 (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
630
631 if (pAsm->cf_current_cf_clause_ptr != NULL)
632 {
633 Init_R700ControlFlowGenericClause(pAsm->cf_current_cf_clause_ptr);
634 AddCFInstruction( pAsm->pR700Shader,
635 (R700ControlFlowInstruction *)pAsm->cf_current_cf_clause_ptr );
636 }
637 else
638 {
639 radeon_error("Could not allocate a new VFetch CF instruction.\n");
640 return GL_FALSE;
641 }
642
643 return GL_TRUE;
644 }
645
646 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
647 R700VertexInstruction* vertex_instruction_ptr)
648 {
649 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
650 {
651 return GL_FALSE;
652 }
653
654 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
655 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
656 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
657 ) )
658 {
659 // Create new Vfetch control flow instruction for this new clause
660 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
661
662 if (pAsm->cf_current_vtx_clause_ptr != NULL)
663 {
664 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
665 AddCFInstruction( pAsm->pR700Shader,
666 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
667 }
668 else
669 {
670 radeon_error("Could not allocate a new VFetch CF instruction.\n");
671 return GL_FALSE;
672 }
673
674 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
675 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
676 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
677 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
678 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
679 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
680 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
681 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
682 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
683
684 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
685 }
686 else
687 {
688 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
689 }
690
691 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
692
693 return GL_TRUE;
694 }
695
696 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
697 R700TextureInstruction* tex_instruction_ptr)
698 {
699 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
700 {
701 return GL_FALSE;
702 }
703
704 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
705 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
706 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
707 ) )
708 {
709 // new tex cf instruction for this new clause
710 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
711
712 if (pAsm->cf_current_tex_clause_ptr != NULL)
713 {
714 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
715 AddCFInstruction( pAsm->pR700Shader,
716 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
717 }
718 else
719 {
720 radeon_error("Could not allocate a new TEX CF instruction.\n");
721 return GL_FALSE;
722 }
723
724 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
725 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
726 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
727
728 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
729 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
730 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
731 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
732 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
733 }
734 else
735 {
736 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
737 }
738
739 // If this clause constains any TEX instruction that is dependent on a previous instruction,
740 // set the barrier bit
741 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
742 {
743 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
744 }
745
746 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
747 {
748 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
749 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
750 }
751
752 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
753
754 return GL_TRUE;
755 }
756
757 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
758 GLuint gl_client_id,
759 GLuint destination_register,
760 GLuint number_of_elements,
761 GLenum dataElementType,
762 VTX_FETCH_METHOD* pFetchMethod)
763 {
764 GLuint client_size_inbyte;
765 GLuint data_format;
766 GLuint mega_fetch_count;
767 GLuint is_mega_fetch_flag;
768
769 R700VertexGenericFetch* vfetch_instruction_ptr;
770 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
771
772 if (assembled_vfetch_instruction_ptr == NULL)
773 {
774 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
775 if (vfetch_instruction_ptr == NULL)
776 {
777 return GL_FALSE;
778 }
779 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
780 }
781 else
782 {
783 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
784 }
785
786 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
787
788 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
789 {
790 //TODO : mini fetch
791 }
792 else
793 {
794 mega_fetch_count = MEGA_FETCH_BYTES - 1;
795 is_mega_fetch_flag = 0x1;
796 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
797 }
798
799 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
800 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
801 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
802
803 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
804 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
805 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
806 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
807 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
808
809 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
810 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
811 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
812 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
813
814 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
815
816 // Destination register
817 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
818 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
819
820 vfetch_instruction_ptr->m_Word2.f.offset = 0;
821 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
822
823 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
824
825 if (assembled_vfetch_instruction_ptr == NULL)
826 {
827 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
828 {
829 return GL_FALSE;
830 }
831
832 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
833 {
834 return GL_FALSE;
835 }
836 else
837 {
838 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
839 }
840 }
841
842 return GL_TRUE;
843 }
844
845 GLboolean assemble_vfetch_instruction2(r700_AssemblerBase* pAsm,
846 GLuint destination_register,
847 GLenum type,
848 GLint size,
849 GLubyte element,
850 GLuint _signed,
851 GLboolean normalize,
852 VTX_FETCH_METHOD * pFetchMethod)
853 {
854 GLuint client_size_inbyte;
855 GLuint data_format;
856 GLuint mega_fetch_count;
857 GLuint is_mega_fetch_flag;
858
859 R700VertexGenericFetch* vfetch_instruction_ptr;
860 R700VertexGenericFetch* assembled_vfetch_instruction_ptr
861 = pAsm->vfetch_instruction_ptr_array[element];
862
863 if (assembled_vfetch_instruction_ptr == NULL)
864 {
865 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
866 if (vfetch_instruction_ptr == NULL)
867 {
868 return GL_FALSE;
869 }
870 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
871 }
872 else
873 {
874 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
875 }
876
877 data_format = GetSurfaceFormat(type, size, &client_size_inbyte);
878
879 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
880 {
881 //TODO : mini fetch
882 }
883 else
884 {
885 mega_fetch_count = MEGA_FETCH_BYTES - 1;
886 is_mega_fetch_flag = 0x1;
887 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
888 }
889
890 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
891 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
892 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
893
894 vfetch_instruction_ptr->m_Word0.f.buffer_id = element;
895 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
896 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
897 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
898 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
899
900 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (size < 1) ? SQ_SEL_0 : SQ_SEL_X;
901 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (size < 2) ? SQ_SEL_0 : SQ_SEL_Y;
902 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (size < 3) ? SQ_SEL_0 : SQ_SEL_Z;
903 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (size < 4) ? SQ_SEL_1 : SQ_SEL_W;
904
905 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
906 vfetch_instruction_ptr->m_Word1.f.data_format = data_format;
907 vfetch_instruction_ptr->m_Word2.f.endian_swap = SQ_ENDIAN_NONE;
908
909 if(1 == _signed)
910 {
911 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_SIGNED;
912 }
913 else
914 {
915 vfetch_instruction_ptr->m_Word1.f.format_comp_all = SQ_FORMAT_COMP_UNSIGNED;
916 }
917
918 if(GL_TRUE == normalize)
919 {
920 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_NORM;
921 }
922 else
923 {
924 vfetch_instruction_ptr->m_Word1.f.num_format_all = SQ_NUM_FORMAT_INT;
925 }
926
927 // Destination register
928 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
929 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
930
931 vfetch_instruction_ptr->m_Word2.f.offset = 0;
932 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
933
934 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
935
936 if (assembled_vfetch_instruction_ptr == NULL)
937 {
938 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
939 {
940 return GL_FALSE;
941 }
942
943 if (pAsm->vfetch_instruction_ptr_array[element] != NULL)
944 {
945 return GL_FALSE;
946 }
947 else
948 {
949 pAsm->vfetch_instruction_ptr_array[element] = vfetch_instruction_ptr;
950 }
951 }
952
953 return GL_TRUE;
954 }
955
956 GLboolean cleanup_vfetch_instructions(r700_AssemblerBase* pAsm)
957 {
958 GLint i;
959 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
960 pAsm->cf_current_vtx_clause_ptr = NULL;
961
962 for (i=0; i<VERT_ATTRIB_MAX; i++)
963 {
964 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
965 }
966
967 cleanup_vfetch_shaderinst(pAsm->pR700Shader);
968
969 return GL_TRUE;
970 }
971
972 GLuint gethelpr(r700_AssemblerBase* pAsm)
973 {
974 GLuint r = pAsm->uHelpReg;
975 pAsm->uHelpReg++;
976 if (pAsm->uHelpReg > pAsm->number_used_registers)
977 {
978 pAsm->number_used_registers = pAsm->uHelpReg;
979 }
980 return r;
981 }
982 void resethelpr(r700_AssemblerBase* pAsm)
983 {
984 pAsm->uHelpReg = pAsm->uFirstHelpReg;
985 }
986
987 void checkop_init(r700_AssemblerBase* pAsm)
988 {
989 resethelpr(pAsm);
990 pAsm->aArgSubst[0] =
991 pAsm->aArgSubst[1] =
992 pAsm->aArgSubst[2] =
993 pAsm->aArgSubst[3] = -1;
994 }
995
996 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
997 {
998 GLuint tmp = gethelpr(pAsm);
999
1000 //mov src to temp helper gpr.
1001 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
1002
1003 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1004
1005 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1006 pAsm->D.dst.reg = tmp;
1007
1008 nomask_PVSDST(&(pAsm->D.dst));
1009
1010 if( GL_FALSE == assemble_src(pAsm, src, 0) )
1011 {
1012 return GL_FALSE;
1013 }
1014
1015 noswizzle_PVSSRC(&(pAsm->S[0].src));
1016 noneg_PVSSRC(&(pAsm->S[0].src));
1017
1018 if( GL_FALSE == next_ins(pAsm) )
1019 {
1020 return GL_FALSE;
1021 }
1022
1023 pAsm->aArgSubst[1 + src] = tmp;
1024
1025 return GL_TRUE;
1026 }
1027
1028 GLboolean checkop1(r700_AssemblerBase* pAsm)
1029 {
1030 checkop_init(pAsm);
1031 return GL_TRUE;
1032 }
1033
1034 GLboolean checkop2(r700_AssemblerBase* pAsm)
1035 {
1036 GLboolean bSrcConst[2];
1037 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1038
1039 checkop_init(pAsm);
1040
1041 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1042 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1043 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1044 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1045 {
1046 bSrcConst[0] = GL_TRUE;
1047 }
1048 else
1049 {
1050 bSrcConst[0] = GL_FALSE;
1051 }
1052 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1053 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1054 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1055 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1056 {
1057 bSrcConst[1] = GL_TRUE;
1058 }
1059 else
1060 {
1061 bSrcConst[1] = GL_FALSE;
1062 }
1063
1064 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
1065 {
1066 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1067 {
1068 if( GL_FALSE == mov_temp(pAsm, 1) )
1069 {
1070 return GL_FALSE;
1071 }
1072 }
1073 }
1074
1075 return GL_TRUE;
1076 }
1077
1078 GLboolean checkop3(r700_AssemblerBase* pAsm)
1079 {
1080 GLboolean bSrcConst[3];
1081 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1082
1083 checkop_init(pAsm);
1084
1085 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
1086 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
1087 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
1088 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
1089 {
1090 bSrcConst[0] = GL_TRUE;
1091 }
1092 else
1093 {
1094 bSrcConst[0] = GL_FALSE;
1095 }
1096 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
1097 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
1098 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
1099 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
1100 {
1101 bSrcConst[1] = GL_TRUE;
1102 }
1103 else
1104 {
1105 bSrcConst[1] = GL_FALSE;
1106 }
1107 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
1108 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
1109 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
1110 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
1111 {
1112 bSrcConst[2] = GL_TRUE;
1113 }
1114 else
1115 {
1116 bSrcConst[2] = GL_FALSE;
1117 }
1118
1119 if( (GL_TRUE == bSrcConst[0]) &&
1120 (GL_TRUE == bSrcConst[1]) &&
1121 (GL_TRUE == bSrcConst[2]) )
1122 {
1123 if( GL_FALSE == mov_temp(pAsm, 1) )
1124 {
1125 return GL_FALSE;
1126 }
1127 if( GL_FALSE == mov_temp(pAsm, 2) )
1128 {
1129 return GL_FALSE;
1130 }
1131
1132 return GL_TRUE;
1133 }
1134 else if( (GL_TRUE == bSrcConst[0]) &&
1135 (GL_TRUE == bSrcConst[1]) )
1136 {
1137 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
1138 {
1139 if( GL_FALSE == mov_temp(pAsm, 1) )
1140 {
1141 return 1;
1142 }
1143 }
1144
1145 return GL_TRUE;
1146 }
1147 else if ( (GL_TRUE == bSrcConst[0]) &&
1148 (GL_TRUE == bSrcConst[2]) )
1149 {
1150 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
1151 {
1152 if( GL_FALSE == mov_temp(pAsm, 2) )
1153 {
1154 return GL_FALSE;
1155 }
1156 }
1157
1158 return GL_TRUE;
1159 }
1160 else if( (GL_TRUE == bSrcConst[1]) &&
1161 (GL_TRUE == bSrcConst[2]) )
1162 {
1163 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
1164 {
1165 if( GL_FALSE == mov_temp(pAsm, 2) )
1166 {
1167 return GL_FALSE;
1168 }
1169 }
1170
1171 return GL_TRUE;
1172 }
1173
1174 return GL_TRUE;
1175 }
1176
1177 GLboolean assemble_src(r700_AssemblerBase *pAsm,
1178 int src,
1179 int fld)
1180 {
1181 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1182
1183 if (fld == -1)
1184 {
1185 fld = src;
1186 }
1187
1188 if(pAsm->aArgSubst[1+src] >= 0)
1189 {
1190 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1191 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1192 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1193 }
1194 else
1195 {
1196 switch (pILInst->SrcReg[src].File)
1197 {
1198 case PROGRAM_TEMPORARY:
1199 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1200 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1201 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1202 break;
1203 case PROGRAM_CONSTANT:
1204 case PROGRAM_LOCAL_PARAM:
1205 case PROGRAM_ENV_PARAM:
1206 case PROGRAM_STATE_VAR:
1207 case PROGRAM_UNIFORM:
1208 if (1 == pILInst->SrcReg[src].RelAddr)
1209 {
1210 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1211 }
1212 else
1213 {
1214 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1215 }
1216
1217 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1218 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1219 break;
1220 case PROGRAM_INPUT:
1221 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1222 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1223 switch (pAsm->currentShaderType)
1224 {
1225 case SPT_FP:
1226 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1227 break;
1228 case SPT_VP:
1229 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1230 break;
1231 }
1232 break;
1233 default:
1234 radeon_error("Invalid source argument type : %d \n", pILInst->SrcReg[src].File);
1235 return GL_FALSE;
1236 }
1237 }
1238
1239 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1240 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1241 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1242 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1243
1244 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1245 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1246 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1247 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1248
1249 return GL_TRUE;
1250 }
1251
1252 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1253 {
1254 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1255 switch (pILInst->DstReg.File)
1256 {
1257 case PROGRAM_TEMPORARY:
1258 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1259 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1260 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1261 break;
1262 case PROGRAM_ADDRESS:
1263 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1264 pAsm->D.dst.rtype = DST_REG_A0;
1265 pAsm->D.dst.reg = 0;
1266 break;
1267 case PROGRAM_OUTPUT:
1268 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1269 pAsm->D.dst.rtype = DST_REG_OUT;
1270 switch (pAsm->currentShaderType)
1271 {
1272 case SPT_FP:
1273 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1274 break;
1275 case SPT_VP:
1276 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1277 break;
1278 }
1279 break;
1280 default:
1281 radeon_error("Invalid destination output argument type\n");
1282 return GL_FALSE;
1283 }
1284
1285 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1286 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1287 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1288 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1289
1290 return GL_TRUE;
1291 }
1292
1293 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1294 {
1295 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1296
1297 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1298 {
1299 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1300 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1301
1302 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1303 }
1304 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1305 {
1306 pAsm->D.dst.rtype = DST_REG_OUT;
1307 switch (pAsm->currentShaderType)
1308 {
1309 case SPT_FP:
1310 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1311 break;
1312 case SPT_VP:
1313 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1314 break;
1315 }
1316
1317 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1318 }
1319 else
1320 {
1321 radeon_error("Invalid destination output argument type\n");
1322 return GL_FALSE;
1323 }
1324
1325 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1326 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1327 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1328 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1329
1330 return GL_TRUE;
1331 }
1332
1333 GLboolean tex_src(r700_AssemblerBase *pAsm)
1334 {
1335 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1336
1337 GLboolean bValidTexCoord = GL_FALSE;
1338
1339 if(pAsm->aArgSubst[1] >= 0)
1340 {
1341 bValidTexCoord = GL_TRUE;
1342 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1343 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1344 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1345 }
1346 else
1347 {
1348 switch (pILInst->SrcReg[0].File) {
1349 case PROGRAM_CONSTANT:
1350 case PROGRAM_LOCAL_PARAM:
1351 case PROGRAM_ENV_PARAM:
1352 case PROGRAM_STATE_VAR:
1353 break;
1354 case PROGRAM_TEMPORARY:
1355 bValidTexCoord = GL_TRUE;
1356 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1357 pAsm->starting_temp_register_number;
1358 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1359 break;
1360 case PROGRAM_INPUT:
1361 switch (pILInst->SrcReg[0].Index)
1362 {
1363 case FRAG_ATTRIB_WPOS:
1364 case FRAG_ATTRIB_COL0:
1365 case FRAG_ATTRIB_COL1:
1366 case FRAG_ATTRIB_FOGC:
1367 case FRAG_ATTRIB_TEX0:
1368 case FRAG_ATTRIB_TEX1:
1369 case FRAG_ATTRIB_TEX2:
1370 case FRAG_ATTRIB_TEX3:
1371 case FRAG_ATTRIB_TEX4:
1372 case FRAG_ATTRIB_TEX5:
1373 case FRAG_ATTRIB_TEX6:
1374 case FRAG_ATTRIB_TEX7:
1375 bValidTexCoord = GL_TRUE;
1376 pAsm->S[0].src.reg =
1377 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1378 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1379 break;
1380 case FRAG_ATTRIB_FACE:
1381 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1382 break;
1383 case FRAG_ATTRIB_PNTC:
1384 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1385 break;
1386 }
1387
1388 if( (pILInst->SrcReg[0].Index >= FRAG_ATTRIB_VAR0) ||
1389 (pILInst->SrcReg[0].Index < FRAG_ATTRIB_MAX) )
1390 {
1391 bValidTexCoord = GL_TRUE;
1392 pAsm->S[0].src.reg =
1393 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1394 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1395 }
1396
1397 break;
1398 }
1399 }
1400
1401 if(GL_TRUE == bValidTexCoord)
1402 {
1403 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1404 }
1405 else
1406 {
1407 radeon_error("Invalid source texcoord for TEX instruction\n");
1408 return GL_FALSE;
1409 }
1410
1411 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1412 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1413 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1414 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1415
1416 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1417 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1418 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1419 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1420
1421 return GL_TRUE;
1422 }
1423
1424 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1425 {
1426 PVSSRC * texture_coordinate_source;
1427 PVSSRC * texture_unit_source;
1428
1429 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1430 if (tex_instruction_ptr == NULL)
1431 {
1432 return GL_FALSE;
1433 }
1434 Init_R700TextureInstruction(tex_instruction_ptr);
1435
1436 texture_coordinate_source = &(pAsm->S[0].src);
1437 texture_unit_source = &(pAsm->S[1].src);
1438
1439 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1440 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1441 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1442
1443 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1444
1445 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1446 if (normalized) {
1447 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1448 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1449 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1450 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1451 } else {
1452 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1453 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1454 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1455 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1456 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1457 }
1458
1459 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1460 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1461 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1462
1463 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1464
1465 // dst
1466 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1467 (pAsm->D.dst.rtype == DST_REG_OUT) )
1468 {
1469 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1470 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1471
1472 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1473 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1474
1475 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1476 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1477 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1478 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1479
1480
1481 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1482 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1483 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1484 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1485 }
1486 else
1487 {
1488 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1489 return GL_FALSE;
1490 }
1491
1492 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1493 {
1494 return GL_FALSE;
1495 }
1496
1497 return GL_TRUE;
1498 }
1499
1500 void initialize(r700_AssemblerBase *pAsm)
1501 {
1502 GLuint cycle, component;
1503
1504 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1505 {
1506 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1507 {
1508 pAsm->hw_gpr[cycle][component] = (-1);
1509 }
1510 }
1511 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1512 {
1513 pAsm->hw_cfile_addr[component] = (-1);
1514 pAsm->hw_cfile_chan[component] = (-1);
1515 }
1516 }
1517
1518 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1519 int source_index,
1520 PVSSRC* pSource,
1521 BITS scalar_channel_index)
1522 {
1523 BITS src_sel;
1524 BITS src_rel;
1525 BITS src_chan;
1526 BITS src_neg;
1527
1528 //--------------------------------------------------------------------------
1529 // Source for operands src0, src1.
1530 // Values [0,127] correspond to GPR[0..127].
1531 // Values [256,511] correspond to cfile constants c[0..255].
1532
1533 //--------------------------------------------------------------------------
1534 // Other special values are shown in the list below.
1535
1536 // 248 SQ_ALU_SRC_0: special constant 0.0.
1537 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1538
1539 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1540 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1541
1542 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1543 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1544
1545 // 254 SQ_ALU_SRC_PV: previous vector result.
1546 // 255 SQ_ALU_SRC_PS: previous scalar result.
1547 //--------------------------------------------------------------------------
1548
1549 BITS channel_swizzle;
1550 switch (scalar_channel_index)
1551 {
1552 case 0: channel_swizzle = pSource->swizzlex; break;
1553 case 1: channel_swizzle = pSource->swizzley; break;
1554 case 2: channel_swizzle = pSource->swizzlez; break;
1555 case 3: channel_swizzle = pSource->swizzlew; break;
1556 default: channel_swizzle = SQ_SEL_MASK; break;
1557 }
1558
1559 if(channel_swizzle == SQ_SEL_0)
1560 {
1561 src_sel = SQ_ALU_SRC_0;
1562 }
1563 else if (channel_swizzle == SQ_SEL_1)
1564 {
1565 src_sel = SQ_ALU_SRC_1;
1566 }
1567 else
1568 {
1569 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1570 (pSource->rtype == SRC_REG_INPUT)
1571 )
1572 {
1573 src_sel = pSource->reg;
1574 }
1575 else if (pSource->rtype == SRC_REG_CONSTANT)
1576 {
1577 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1578 }
1579 else if (pSource->rtype == SRC_REC_LITERAL)
1580 {
1581 src_sel = SQ_ALU_SRC_LITERAL;
1582 }
1583 else
1584 {
1585 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1586 source_index, pSource->rtype);
1587 return GL_FALSE;
1588 }
1589 }
1590
1591 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1592 {
1593 src_rel = SQ_ABSOLUTE;
1594 }
1595 else
1596 {
1597 src_rel = SQ_RELATIVE;
1598 }
1599
1600 switch (channel_swizzle)
1601 {
1602 case SQ_SEL_X:
1603 src_chan = SQ_CHAN_X;
1604 break;
1605 case SQ_SEL_Y:
1606 src_chan = SQ_CHAN_Y;
1607 break;
1608 case SQ_SEL_Z:
1609 src_chan = SQ_CHAN_Z;
1610 break;
1611 case SQ_SEL_W:
1612 src_chan = SQ_CHAN_W;
1613 break;
1614 case SQ_SEL_0:
1615 case SQ_SEL_1:
1616 // Does not matter since src_sel controls
1617 src_chan = SQ_CHAN_X;
1618 break;
1619 default:
1620 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1621 return GL_FALSE;
1622 break;
1623 }
1624
1625 switch (scalar_channel_index)
1626 {
1627 case 0: src_neg = pSource->negx; break;
1628 case 1: src_neg = pSource->negy; break;
1629 case 2: src_neg = pSource->negz; break;
1630 case 3: src_neg = pSource->negw; break;
1631 default: src_neg = 0; break;
1632 }
1633
1634 switch (source_index)
1635 {
1636 case 0:
1637 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1638 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1639 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1640 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1641 break;
1642 case 1:
1643 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1644 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1645 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1646 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1647 break;
1648 case 2:
1649 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1650 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1651 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1652 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1653 break;
1654 default:
1655 radeon_error("Only three sources allowed in ALU opcodes.\n");
1656 return GL_FALSE;
1657 break;
1658 }
1659
1660 return GL_TRUE;
1661 }
1662
1663 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1664 R700ALUInstruction* alu_instruction_ptr,
1665 GLuint contiguous_slots_needed)
1666 {
1667 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1668 {
1669 return GL_FALSE;
1670 }
1671
1672 if ( pAsm->alu_x_opcode != 0 ||
1673 pAsm->cf_current_alu_clause_ptr == NULL ||
1674 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1675 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1676 ) )
1677 {
1678
1679 //new cf inst for this clause
1680 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1681
1682 // link the new cf to cf segment
1683 if(NULL != pAsm->cf_current_alu_clause_ptr)
1684 {
1685 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1686 AddCFInstruction( pAsm->pR700Shader,
1687 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1688 }
1689 else
1690 {
1691 radeon_error("Could not allocate a new ALU CF instruction.\n");
1692 return GL_FALSE;
1693 }
1694
1695 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1696 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1697 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1698
1699 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1700 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1701 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1702
1703 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1704
1705 if(pAsm->alu_x_opcode != 0)
1706 {
1707 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = pAsm->alu_x_opcode;
1708 pAsm->alu_x_opcode = 0;
1709 }
1710 else
1711 {
1712 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1713 }
1714
1715 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1716
1717 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1718 }
1719 else
1720 {
1721 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1722 }
1723
1724 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1725 // set the whole_quad_mode for this clause
1726 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1727 {
1728 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1729 }
1730
1731 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1732 {
1733 alu_instruction_ptr->m_Word0.f.last = 1;
1734 }
1735
1736 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1737 {
1738 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1739 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1740 }
1741
1742 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1743
1744 return GL_TRUE;
1745 }
1746
1747 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1748 int source_index,
1749 BITS* psrc_sel,
1750 BITS* psrc_rel,
1751 BITS* psrc_chan,
1752 BITS* psrc_neg)
1753 {
1754 switch (source_index)
1755 {
1756 case 0:
1757 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1758 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1759 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1760 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1761 break;
1762
1763 case 1:
1764 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1765 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1766 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1767 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1768 break;
1769
1770 case 2:
1771 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1772 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1773 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1774 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1775 break;
1776 }
1777 }
1778
1779 int is_cfile(BITS sel)
1780 {
1781 if (sel > 255 && sel < 512)
1782 {
1783 return 1;
1784 }
1785 return 0;
1786 }
1787
1788 int is_const(BITS sel)
1789 {
1790 if (is_cfile(sel))
1791 {
1792 return 1;
1793 }
1794 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1795 {
1796 return 1;
1797 }
1798 return 0;
1799 }
1800
1801 int is_gpr(BITS sel)
1802 {
1803 if (sel >= 0 && sel < 128)
1804 {
1805 return 1;
1806 }
1807 return 0;
1808 }
1809
1810 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1811 SQ_ALU_VEC_120, //001
1812 SQ_ALU_VEC_102, //010
1813
1814 SQ_ALU_VEC_201, //011
1815 SQ_ALU_VEC_012, //100
1816 SQ_ALU_VEC_021, //101
1817
1818 SQ_ALU_VEC_012, //110
1819 SQ_ALU_VEC_012}; //111
1820
1821 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1822 SQ_ALU_SCL_122, //001
1823 SQ_ALU_SCL_122, //010
1824
1825 SQ_ALU_SCL_221, //011
1826 SQ_ALU_SCL_212, //100
1827 SQ_ALU_SCL_122, //101
1828
1829 SQ_ALU_SCL_122, //110
1830 SQ_ALU_SCL_122}; //111
1831
1832 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1833 GLuint sel,
1834 GLuint chan)
1835 {
1836 int res_match = (-1);
1837 int res_empty = (-1);
1838
1839 GLint res;
1840
1841 for (res=3; res>=0; res--)
1842 {
1843 if(pAsm->hw_cfile_addr[ res] < 0)
1844 {
1845 res_empty = res;
1846 }
1847 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1848 &&
1849 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1850 {
1851 res_match = res;
1852 }
1853 }
1854
1855 if(res_match >= 0)
1856 {
1857 // Read for this scalar component already reserved, nothing to do here.
1858 ;
1859 }
1860 else if(res_empty >= 0)
1861 {
1862 pAsm->hw_cfile_addr[ res_empty ] = sel;
1863 pAsm->hw_cfile_chan[ res_empty ] = chan;
1864 }
1865 else
1866 {
1867 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1868 return GL_FALSE;
1869 }
1870 return GL_TRUE;
1871 }
1872
1873 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1874 {
1875 if(pAsm->hw_gpr[cycle][chan] < 0)
1876 {
1877 pAsm->hw_gpr[cycle][chan] = sel;
1878 }
1879 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1880 {
1881 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1882 return GL_FALSE;
1883 }
1884
1885 return GL_TRUE;
1886 }
1887
1888 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1889 {
1890 switch (swiz)
1891 {
1892 case SQ_ALU_SCL_210:
1893 {
1894 int table[3] = {2, 1, 0};
1895 *pCycle = table[sel];
1896 return GL_TRUE;
1897 }
1898 break;
1899 case SQ_ALU_SCL_122:
1900 {
1901 int table[3] = {1, 2, 2};
1902 *pCycle = table[sel];
1903 return GL_TRUE;
1904 }
1905 break;
1906 case SQ_ALU_SCL_212:
1907 {
1908 int table[3] = {2, 1, 2};
1909 *pCycle = table[sel];
1910 return GL_TRUE;
1911 }
1912 break;
1913 case SQ_ALU_SCL_221:
1914 {
1915 int table[3] = {2, 2, 1};
1916 *pCycle = table[sel];
1917 return GL_TRUE;
1918 }
1919 break;
1920 default:
1921 radeon_error("Bad Scalar bank swizzle value\n");
1922 break;
1923 }
1924
1925 return GL_FALSE;
1926 }
1927
1928 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1929 {
1930 switch (swiz)
1931 {
1932 case SQ_ALU_VEC_012:
1933 {
1934 int table[3] = {0, 1, 2};
1935 *pCycle = table[sel];
1936 }
1937 break;
1938 case SQ_ALU_VEC_021:
1939 {
1940 int table[3] = {0, 2, 1};
1941 *pCycle = table[sel];
1942 }
1943 break;
1944 case SQ_ALU_VEC_120:
1945 {
1946 int table[3] = {1, 2, 0};
1947 *pCycle = table[sel];
1948 }
1949 break;
1950 case SQ_ALU_VEC_102:
1951 {
1952 int table[3] = {1, 0, 2};
1953 *pCycle = table[sel];
1954 }
1955 break;
1956 case SQ_ALU_VEC_201:
1957 {
1958 int table[3] = {2, 0, 1};
1959 *pCycle = table[sel];
1960 }
1961 break;
1962 case SQ_ALU_VEC_210:
1963 {
1964 int table[3] = {2, 1, 0};
1965 *pCycle = table[sel];
1966 }
1967 break;
1968 default:
1969 radeon_error("Bad Vec bank swizzle value\n");
1970 return GL_FALSE;
1971 break;
1972 }
1973
1974 return GL_TRUE;
1975 }
1976
1977 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1978 R700ALUInstruction* alu_instruction_ptr)
1979 {
1980 GLuint cycle;
1981 GLuint bank_swizzle;
1982 GLuint const_count = 0;
1983
1984 BITS sel;
1985 BITS chan;
1986 BITS rel;
1987 BITS neg;
1988
1989 GLuint src;
1990
1991 BITS src_sel [3] = {0,0,0};
1992 BITS src_chan[3] = {0,0,0};
1993 BITS src_rel [3] = {0,0,0};
1994 BITS src_neg [3] = {0,0,0};
1995
1996 GLuint swizzle_key;
1997
1998 GLuint number_of_operands = r700GetNumOperands(pAsm);
1999
2000 for (src=0; src<number_of_operands; src++)
2001 {
2002 get_src_properties(alu_instruction_ptr,
2003 src,
2004 &(src_sel[src]),
2005 &(src_rel[src]),
2006 &(src_chan[src]),
2007 &(src_neg[src]) );
2008 }
2009
2010
2011 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2012 (is_const( src_sel[1] ) ? 2 : 0) +
2013 (is_const( src_sel[2] ) ? 1 : 0) );
2014
2015 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
2016
2017 for (src=0; src<number_of_operands; src++)
2018 {
2019 sel = src_sel [src];
2020 chan = src_chan[src];
2021 rel = src_rel [src];
2022 neg = src_neg [src];
2023
2024 if (is_const( sel ))
2025 {
2026 // Any constant, including literal and inline constants
2027 const_count++;
2028
2029 if (is_cfile( sel ))
2030 {
2031 reserve_cfile(pAsm, sel, chan);
2032 }
2033
2034 }
2035 }
2036
2037 for (src=0; src<number_of_operands; src++)
2038 {
2039 sel = src_sel [src];
2040 chan = src_chan[src];
2041 rel = src_rel [src];
2042 neg = src_neg [src];
2043
2044 if( is_gpr(sel) )
2045 {
2046 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2047
2048 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
2049 {
2050 return GL_FALSE;
2051 }
2052
2053 if(cycle < const_count)
2054 {
2055 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2056 {
2057 return GL_FALSE;
2058 }
2059 }
2060 }
2061 }
2062
2063 return GL_TRUE;
2064 }
2065
2066 GLboolean check_vector(r700_AssemblerBase* pAsm,
2067 R700ALUInstruction* alu_instruction_ptr)
2068 {
2069 GLuint cycle;
2070 GLuint bank_swizzle;
2071 GLuint const_count = 0;
2072
2073 GLuint src;
2074
2075 BITS sel;
2076 BITS chan;
2077 BITS rel;
2078 BITS neg;
2079
2080 BITS src_sel [3] = {0,0,0};
2081 BITS src_chan[3] = {0,0,0};
2082 BITS src_rel [3] = {0,0,0};
2083 BITS src_neg [3] = {0,0,0};
2084
2085 GLuint swizzle_key;
2086
2087 GLuint number_of_operands = r700GetNumOperands(pAsm);
2088
2089 for (src=0; src<number_of_operands; src++)
2090 {
2091 get_src_properties(alu_instruction_ptr,
2092 src,
2093 &(src_sel[src]),
2094 &(src_rel[src]),
2095 &(src_chan[src]),
2096 &(src_neg[src]) );
2097 }
2098
2099
2100 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
2101 (is_const( src_sel[1] ) ? 2 : 0) +
2102 (is_const( src_sel[2] ) ? 1 : 0)
2103 );
2104
2105 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
2106
2107 for (src=0; src<number_of_operands; src++)
2108 {
2109 sel = src_sel [src];
2110 chan = src_chan[src];
2111 rel = src_rel [src];
2112 neg = src_neg [src];
2113
2114
2115 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
2116
2117 if( is_gpr(sel) )
2118 {
2119 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
2120 {
2121 return GL_FALSE;
2122 }
2123
2124 if ( (src == 1) &&
2125 (sel == src_sel[0]) &&
2126 (chan == src_chan[0]) )
2127 {
2128 }
2129 else
2130 {
2131 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
2132 {
2133 return GL_FALSE;
2134 }
2135 }
2136 }
2137 else if( is_const(sel) )
2138 {
2139 const_count++;
2140
2141 if( is_cfile(sel) )
2142 {
2143 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
2144 {
2145 return GL_FALSE;
2146 }
2147 }
2148 }
2149 }
2150
2151 return GL_TRUE;
2152 }
2153
2154 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
2155 {
2156 GLuint number_of_scalar_operations;
2157 GLboolean is_single_scalar_operation;
2158 GLuint scalar_channel_index;
2159
2160 PVSSRC * pcurrent_source;
2161 int current_source_index;
2162 GLuint contiguous_slots_needed;
2163
2164 GLuint uNumSrc = r700GetNumOperands(pAsm);
2165 //GLuint channel_swizzle, j;
2166 //GLuint chan_counter[4] = {0, 0, 0, 0};
2167 //PVSSRC * pSource[3];
2168 GLboolean bSplitInst = GL_FALSE;
2169
2170 if (1 == pAsm->D.dst.math)
2171 {
2172 is_single_scalar_operation = GL_TRUE;
2173 number_of_scalar_operations = 1;
2174 }
2175 else
2176 {
2177 is_single_scalar_operation = GL_FALSE;
2178 number_of_scalar_operations = 4;
2179
2180 /* current assembler doesn't do more than 1 register per source */
2181 #if 0
2182 /* check read port, only very preliminary algorithm, not count in
2183 src0/1 same comp case and prev slot repeat case; also not count relative
2184 addressing. TODO: improve performance. */
2185 for(j=0; j<uNumSrc; j++)
2186 {
2187 pSource[j] = &(pAsm->S[j].src);
2188 }
2189 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
2190 {
2191 for(j=0; j<uNumSrc; j++)
2192 {
2193 switch (scalar_channel_index)
2194 {
2195 case 0: channel_swizzle = pSource[j]->swizzlex; break;
2196 case 1: channel_swizzle = pSource[j]->swizzley; break;
2197 case 2: channel_swizzle = pSource[j]->swizzlez; break;
2198 case 3: channel_swizzle = pSource[j]->swizzlew; break;
2199 default: channel_swizzle = SQ_SEL_MASK; break;
2200 }
2201 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2202 (pSource[j]->rtype == SRC_REG_INPUT))
2203 && (channel_swizzle <= SQ_SEL_W) )
2204 {
2205 chan_counter[channel_swizzle]++;
2206 }
2207 }
2208 }
2209 if( (chan_counter[SQ_SEL_X] > 3)
2210 || (chan_counter[SQ_SEL_Y] > 3)
2211 || (chan_counter[SQ_SEL_Z] > 3)
2212 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2213 {
2214 bSplitInst = GL_TRUE;
2215 }
2216 #endif
2217 }
2218
2219 contiguous_slots_needed = 0;
2220
2221 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2222 {
2223 contiguous_slots_needed = 4;
2224 }
2225
2226 initialize(pAsm);
2227
2228 for (scalar_channel_index=0;
2229 scalar_channel_index < number_of_scalar_operations;
2230 scalar_channel_index++)
2231 {
2232 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2233 if (alu_instruction_ptr == NULL)
2234 {
2235 return GL_FALSE;
2236 }
2237 Init_R700ALUInstruction(alu_instruction_ptr);
2238
2239 //src 0
2240 current_source_index = 0;
2241 pcurrent_source = &(pAsm->S[0].src);
2242
2243 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2244 current_source_index,
2245 pcurrent_source,
2246 scalar_channel_index) )
2247 {
2248 return GL_FALSE;
2249 }
2250
2251 if (uNumSrc > 1)
2252 {
2253 // Process source 1
2254 current_source_index = 1;
2255 pcurrent_source = &(pAsm->S[current_source_index].src);
2256
2257 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2258 current_source_index,
2259 pcurrent_source,
2260 scalar_channel_index) )
2261 {
2262 return GL_FALSE;
2263 }
2264 }
2265
2266 //other bits
2267 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_AR_X;
2268
2269 if( (is_single_scalar_operation == GL_TRUE)
2270 || (GL_TRUE == bSplitInst) )
2271 {
2272 alu_instruction_ptr->m_Word0.f.last = 1;
2273 }
2274 else
2275 {
2276 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2277 }
2278
2279 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2280 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2281 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2282
2283 // dst
2284 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2285 (pAsm->D.dst.rtype == DST_REG_OUT) )
2286 {
2287 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2288 }
2289 else
2290 {
2291 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2292 return GL_FALSE;
2293 }
2294
2295 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2296
2297 if ( is_single_scalar_operation == GL_TRUE )
2298 {
2299 // Override scalar_channel_index since only one scalar value will be written
2300 if(pAsm->D.dst.writex)
2301 {
2302 scalar_channel_index = 0;
2303 }
2304 else if(pAsm->D.dst.writey)
2305 {
2306 scalar_channel_index = 1;
2307 }
2308 else if(pAsm->D.dst.writez)
2309 {
2310 scalar_channel_index = 2;
2311 }
2312 else if(pAsm->D.dst.writew)
2313 {
2314 scalar_channel_index = 3;
2315 }
2316 }
2317
2318 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2319
2320 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2321
2322 if (pAsm->D.dst.op3)
2323 {
2324 //op3
2325
2326 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2327
2328 //There's 3rd src for op3
2329 current_source_index = 2;
2330 pcurrent_source = &(pAsm->S[current_source_index].src);
2331
2332 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2333 current_source_index,
2334 pcurrent_source,
2335 scalar_channel_index) )
2336 {
2337 return GL_FALSE;
2338 }
2339 }
2340 else
2341 {
2342 //op2
2343 if (pAsm->bR6xx)
2344 {
2345 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2346
2347 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2348 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2349
2350 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2351 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2352 switch (scalar_channel_index)
2353 {
2354 case 0:
2355 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2356 break;
2357 case 1:
2358 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2359 break;
2360 case 2:
2361 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2362 break;
2363 case 3:
2364 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2365 break;
2366 default:
2367 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2368 break;
2369 }
2370 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2371 }
2372 else
2373 {
2374 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2375
2376 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2377 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2378
2379 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2380 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2381 switch (scalar_channel_index)
2382 {
2383 case 0:
2384 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2385 break;
2386 case 1:
2387 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2388 break;
2389 case 2:
2390 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2391 break;
2392 case 3:
2393 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2394 break;
2395 default:
2396 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2397 break;
2398 }
2399 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2400 }
2401 }
2402
2403 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2404 {
2405 return GL_FALSE;
2406 }
2407
2408 /*
2409 * Judge the type of current instruction, is it vector or scalar
2410 * instruction.
2411 */
2412 if (is_single_scalar_operation)
2413 {
2414 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2415 {
2416 return GL_FALSE;
2417 }
2418 }
2419 else
2420 {
2421 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2422 {
2423 return 1;
2424 }
2425 }
2426
2427 contiguous_slots_needed = 0;
2428 }
2429
2430 return GL_TRUE;
2431 }
2432
2433 GLboolean assemble_alu_instruction2(r700_AssemblerBase *pAsm)
2434 {
2435 GLuint number_of_scalar_operations;
2436 GLboolean is_single_scalar_operation;
2437 GLuint scalar_channel_index;
2438
2439 PVSSRC * pcurrent_source;
2440 int current_source_index;
2441 GLuint contiguous_slots_needed;
2442
2443 GLuint uNumSrc = r700GetNumOperands(pAsm);
2444
2445 GLboolean bSplitInst = GL_FALSE;
2446
2447 if (1 == pAsm->D.dst.math)
2448 {
2449 is_single_scalar_operation = GL_TRUE;
2450 number_of_scalar_operations = 1;
2451 }
2452 else
2453 {
2454 is_single_scalar_operation = GL_FALSE;
2455 number_of_scalar_operations = 4;
2456 }
2457
2458 contiguous_slots_needed = 0;
2459
2460 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2461 {
2462 contiguous_slots_needed = 4;
2463 }
2464
2465 initialize(pAsm);
2466
2467 for (scalar_channel_index=0;
2468 scalar_channel_index < number_of_scalar_operations;
2469 scalar_channel_index++)
2470 {
2471 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2472 if (alu_instruction_ptr == NULL)
2473 {
2474 return GL_FALSE;
2475 }
2476 Init_R700ALUInstruction(alu_instruction_ptr);
2477
2478 //src 0
2479 current_source_index = 0;
2480 pcurrent_source = &(pAsm->S[0].src);
2481
2482 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2483 current_source_index,
2484 pcurrent_source,
2485 scalar_channel_index) )
2486 {
2487 return GL_FALSE;
2488 }
2489
2490 if (uNumSrc > 1)
2491 {
2492 // Process source 1
2493 current_source_index = 1;
2494 pcurrent_source = &(pAsm->S[current_source_index].src);
2495
2496 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2497 current_source_index,
2498 pcurrent_source,
2499 scalar_channel_index) )
2500 {
2501 return GL_FALSE;
2502 }
2503 }
2504
2505 //other bits
2506 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2507
2508 if( (is_single_scalar_operation == GL_TRUE)
2509 || (GL_TRUE == bSplitInst) )
2510 {
2511 alu_instruction_ptr->m_Word0.f.last = 1;
2512 }
2513 else
2514 {
2515 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2516 }
2517
2518 alu_instruction_ptr->m_Word0.f.pred_sel = (pAsm->D.dst.pred_inv > 0) ? 1 : 0;
2519 if(1 == pAsm->D.dst.predicated)
2520 {
2521 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2522 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2523 }
2524 else
2525 {
2526 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2527 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2528 }
2529
2530 // dst
2531 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2532 (pAsm->D.dst.rtype == DST_REG_OUT) )
2533 {
2534 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2535 }
2536 else
2537 {
2538 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2539 return GL_FALSE;
2540 }
2541
2542 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2543
2544 if ( is_single_scalar_operation == GL_TRUE )
2545 {
2546 // Override scalar_channel_index since only one scalar value will be written
2547 if(pAsm->D.dst.writex)
2548 {
2549 scalar_channel_index = 0;
2550 }
2551 else if(pAsm->D.dst.writey)
2552 {
2553 scalar_channel_index = 1;
2554 }
2555 else if(pAsm->D.dst.writez)
2556 {
2557 scalar_channel_index = 2;
2558 }
2559 else if(pAsm->D.dst.writew)
2560 {
2561 scalar_channel_index = 3;
2562 }
2563 }
2564
2565 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2566
2567 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2568
2569 if (pAsm->D.dst.op3)
2570 {
2571 //op3
2572
2573 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2574
2575 //There's 3rd src for op3
2576 current_source_index = 2;
2577 pcurrent_source = &(pAsm->S[current_source_index].src);
2578
2579 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2580 current_source_index,
2581 pcurrent_source,
2582 scalar_channel_index) )
2583 {
2584 return GL_FALSE;
2585 }
2586 }
2587 else
2588 {
2589 //op2
2590 if (pAsm->bR6xx)
2591 {
2592 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2593
2594 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2595 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2596
2597 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2598 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2599 switch (scalar_channel_index)
2600 {
2601 case 0:
2602 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2603 break;
2604 case 1:
2605 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2606 break;
2607 case 2:
2608 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2609 break;
2610 case 3:
2611 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2612 break;
2613 default:
2614 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2615 break;
2616 }
2617 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2618 }
2619 else
2620 {
2621 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2622
2623 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2624 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2625
2626 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2627 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2628 switch (scalar_channel_index)
2629 {
2630 case 0:
2631 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2632 break;
2633 case 1:
2634 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2635 break;
2636 case 2:
2637 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2638 break;
2639 case 3:
2640 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2641 break;
2642 default:
2643 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2644 break;
2645 }
2646 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2647 }
2648 }
2649
2650 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2651 {
2652 return GL_FALSE;
2653 }
2654
2655 /*
2656 * Judge the type of current instruction, is it vector or scalar
2657 * instruction.
2658 */
2659 if (is_single_scalar_operation)
2660 {
2661 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2662 {
2663 return GL_FALSE;
2664 }
2665 }
2666 else
2667 {
2668 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2669 {
2670 return 1;
2671 }
2672 }
2673
2674 contiguous_slots_needed = 0;
2675 }
2676
2677 return GL_TRUE;
2678 }
2679
2680 GLboolean assemble_alu_instruction_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
2681 {
2682 R700ALUInstruction * alu_instruction_ptr;
2683 R700ALUInstructionHalfLiteral * alu_instruction_ptr_hl;
2684 R700ALUInstructionFullLiteral * alu_instruction_ptr_fl;
2685
2686 GLuint number_of_scalar_operations;
2687 GLboolean is_single_scalar_operation;
2688 GLuint scalar_channel_index;
2689
2690 GLuint contiguous_slots_needed;
2691 GLuint lastInstruction;
2692 GLuint not_masked[4];
2693
2694 GLuint uNumSrc = r700GetNumOperands(pAsm);
2695
2696 GLboolean bSplitInst = GL_FALSE;
2697
2698 number_of_scalar_operations = 0;
2699 contiguous_slots_needed = 0;
2700
2701 if(1 == pAsm->D.dst.writew)
2702 {
2703 lastInstruction = 3;
2704 number_of_scalar_operations++;
2705 not_masked[3] = 1;
2706 }
2707 else
2708 {
2709 not_masked[3] = 0;
2710 }
2711 if(1 == pAsm->D.dst.writez)
2712 {
2713 lastInstruction = 2;
2714 number_of_scalar_operations++;
2715 not_masked[2] = 1;
2716 }
2717 else
2718 {
2719 not_masked[2] = 0;
2720 }
2721 if(1 == pAsm->D.dst.writey)
2722 {
2723 lastInstruction = 1;
2724 number_of_scalar_operations++;
2725 not_masked[1] = 1;
2726 }
2727 else
2728 {
2729 not_masked[1] = 0;
2730 }
2731 if(1 == pAsm->D.dst.writex)
2732 {
2733 lastInstruction = 0;
2734 number_of_scalar_operations++;
2735 not_masked[0] = 1;
2736 }
2737 else
2738 {
2739 not_masked[0] = 0;
2740 }
2741
2742 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2743 {
2744 contiguous_slots_needed = 4;
2745 }
2746 else
2747 {
2748 contiguous_slots_needed = number_of_scalar_operations;
2749 }
2750
2751 if(1 == pAsm->D2.dst2.literal)
2752 {
2753 contiguous_slots_needed += 1;
2754 }
2755 else if(2 == pAsm->D2.dst2.literal)
2756 {
2757 contiguous_slots_needed += 2;
2758 }
2759
2760 initialize(pAsm);
2761
2762 for (scalar_channel_index=0; scalar_channel_index < 4; scalar_channel_index++)
2763 {
2764 if(0 == not_masked[scalar_channel_index])
2765 {
2766 continue;
2767 }
2768
2769 if(scalar_channel_index == lastInstruction)
2770 {
2771 switch (pAsm->D2.dst2.literal)
2772 {
2773 case 0:
2774 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2775 if (alu_instruction_ptr == NULL)
2776 {
2777 return GL_FALSE;
2778 }
2779 Init_R700ALUInstruction(alu_instruction_ptr);
2780 break;
2781 case 1:
2782 alu_instruction_ptr_hl = (R700ALUInstructionHalfLiteral*) CALLOC_STRUCT(R700ALUInstructionHalfLiteral);
2783 if (alu_instruction_ptr_hl == NULL)
2784 {
2785 return GL_FALSE;
2786 }
2787 Init_R700ALUInstructionHalfLiteral(alu_instruction_ptr_hl, pLiteral[0], pLiteral[1]);
2788 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_hl;
2789 break;
2790 case 2:
2791 alu_instruction_ptr_fl = (R700ALUInstructionFullLiteral*) CALLOC_STRUCT(R700ALUInstructionFullLiteral);
2792 if (alu_instruction_ptr_fl == NULL)
2793 {
2794 return GL_FALSE;
2795 }
2796 Init_R700ALUInstructionFullLiteral(alu_instruction_ptr_fl, pLiteral[0], pLiteral[1], pLiteral[2], pLiteral[3]);
2797 alu_instruction_ptr = (R700ALUInstruction*)alu_instruction_ptr_fl;
2798 break;
2799 default:
2800 break;
2801 };
2802 }
2803 else
2804 {
2805 alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2806 if (alu_instruction_ptr == NULL)
2807 {
2808 return GL_FALSE;
2809 }
2810 Init_R700ALUInstruction(alu_instruction_ptr);
2811 }
2812
2813 //src 0
2814 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2815 0,
2816 &(pAsm->S[0].src),
2817 scalar_channel_index) )
2818 {
2819 return GL_FALSE;
2820 }
2821
2822 if (uNumSrc > 1)
2823 {
2824 // Process source 1
2825 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2826 1,
2827 &(pAsm->S[1].src),
2828 scalar_channel_index) )
2829 {
2830 return GL_FALSE;
2831 }
2832 }
2833
2834 //other bits
2835 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2836
2837 if(scalar_channel_index == lastInstruction)
2838 {
2839 alu_instruction_ptr->m_Word0.f.last = 1;
2840 }
2841
2842 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2843 if(1 == pAsm->D.dst.predicated)
2844 {
2845 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x1;
2846 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x1;
2847 }
2848 else
2849 {
2850 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0;
2851 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0;
2852 }
2853
2854 // dst
2855 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2856 (pAsm->D.dst.rtype == DST_REG_OUT) )
2857 {
2858 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2859 }
2860 else
2861 {
2862 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2863 return GL_FALSE;
2864 }
2865
2866 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2867
2868 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2869
2870 alu_instruction_ptr->m_Word1.f.clamp = pAsm->D2.dst2.SaturateMode;
2871
2872 if (pAsm->D.dst.op3)
2873 {
2874 //op3
2875 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2876
2877 //There's 3rd src for op3
2878 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2879 2,
2880 &(pAsm->S[2].src),
2881 scalar_channel_index) )
2882 {
2883 return GL_FALSE;
2884 }
2885 }
2886 else
2887 {
2888 //op2
2889 if (pAsm->bR6xx)
2890 {
2891 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2892 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2893 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2894 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1;
2895 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2896 }
2897 else
2898 {
2899 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2900 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2901 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2902 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1;
2903 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2904 }
2905 }
2906
2907 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2908 {
2909 return GL_FALSE;
2910 }
2911
2912 if (1 == number_of_scalar_operations)
2913 {
2914 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2915 {
2916 return GL_FALSE;
2917 }
2918 }
2919 else
2920 {
2921 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2922 {
2923 return GL_FALSE;
2924 }
2925 }
2926
2927 contiguous_slots_needed -= 2;
2928 }
2929
2930 return GL_TRUE;
2931 }
2932
2933 GLboolean next_ins(r700_AssemblerBase *pAsm)
2934 {
2935 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2936
2937 if( GL_TRUE == pAsm->is_tex )
2938 {
2939 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2940 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2941 {
2942 radeon_error("Error assembling TEX instruction\n");
2943 return GL_FALSE;
2944 }
2945 } else {
2946 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2947 {
2948 radeon_error("Error assembling TEX instruction\n");
2949 return GL_FALSE;
2950 }
2951 }
2952 }
2953 else
2954 { //ALU
2955 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2956 {
2957 radeon_error("Error assembling ALU instruction\n");
2958 return GL_FALSE;
2959 }
2960 }
2961
2962 if(pAsm->D.dst.rtype == DST_REG_OUT)
2963 {
2964 if(pAsm->D.dst.op3)
2965 {
2966 // There is no mask for OP3 instructions, so all channels are written
2967 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2968 }
2969 else
2970 {
2971 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2972 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2973 }
2974 }
2975
2976 //reset for next inst.
2977 pAsm->D.bits = 0;
2978 pAsm->D2.bits = 0;
2979 pAsm->S[0].bits = 0;
2980 pAsm->S[1].bits = 0;
2981 pAsm->S[2].bits = 0;
2982 pAsm->is_tex = GL_FALSE;
2983 pAsm->need_tex_barrier = GL_FALSE;
2984
2985 return GL_TRUE;
2986 }
2987
2988 GLboolean next_ins2(r700_AssemblerBase *pAsm)
2989 {
2990 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2991
2992 //ALU
2993 if( GL_FALSE == assemble_alu_instruction2(pAsm) )
2994 {
2995 radeon_error("Error assembling ALU instruction\n");
2996 return GL_FALSE;
2997 }
2998
2999 if(pAsm->D.dst.rtype == DST_REG_OUT)
3000 {
3001 if(pAsm->D.dst.op3)
3002 {
3003 // There is no mask for OP3 instructions, so all channels are written
3004 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
3005 }
3006 else
3007 {
3008 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
3009 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
3010 }
3011 }
3012
3013 //reset for next inst.
3014 pAsm->D.bits = 0;
3015 pAsm->D2.bits = 0;
3016 pAsm->S[0].bits = 0;
3017 pAsm->S[1].bits = 0;
3018 pAsm->S[2].bits = 0;
3019 pAsm->is_tex = GL_FALSE;
3020 pAsm->need_tex_barrier = GL_FALSE;
3021
3022 pAsm->D2.bits = 0;
3023
3024 return GL_TRUE;
3025 }
3026
3027 /* not work yet */
3028 GLboolean next_ins_literal(r700_AssemblerBase *pAsm, GLfloat * pLiteral)
3029 {
3030 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3031
3032 //ALU
3033 if( GL_FALSE == assemble_alu_instruction_literal(pAsm, pLiteral) )
3034 {
3035 radeon_error("Error assembling ALU instruction\n");
3036 return GL_FALSE;
3037 }
3038
3039 //reset for next inst.
3040 pAsm->D.bits = 0;
3041 pAsm->D2.bits = 0;
3042 pAsm->S[0].bits = 0;
3043 pAsm->S[1].bits = 0;
3044 pAsm->S[2].bits = 0;
3045 pAsm->is_tex = GL_FALSE;
3046 pAsm->need_tex_barrier = GL_FALSE;
3047 return GL_TRUE;
3048 }
3049
3050 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
3051 {
3052 BITS tmp;
3053
3054 checkop1(pAsm);
3055
3056 tmp = gethelpr(pAsm);
3057
3058 // opcode tmp.x, a.x
3059 // MOV dst, tmp.x
3060
3061 pAsm->D.dst.opcode = opcode;
3062 pAsm->D.dst.math = 1;
3063
3064 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3065 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3066 pAsm->D.dst.reg = tmp;
3067 pAsm->D.dst.writex = 1;
3068
3069 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3070 {
3071 return GL_FALSE;
3072 }
3073
3074 if ( GL_FALSE == next_ins(pAsm) )
3075 {
3076 return GL_FALSE;
3077 }
3078
3079 // Now replicate result to all necessary channels in destination
3080 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3081
3082 if( GL_FALSE == assemble_dst(pAsm) )
3083 {
3084 return GL_FALSE;
3085 }
3086
3087 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3088 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3089 pAsm->S[0].src.reg = tmp;
3090
3091 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3092 noneg_PVSSRC(&(pAsm->S[0].src));
3093
3094 if( GL_FALSE == next_ins(pAsm) )
3095 {
3096 return GL_FALSE;
3097 }
3098
3099 return GL_TRUE;
3100 }
3101
3102 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
3103 {
3104 checkop1(pAsm);
3105
3106 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3107
3108 if( GL_FALSE == assemble_dst(pAsm) )
3109 {
3110 return GL_FALSE;
3111 }
3112 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3113 {
3114 return GL_FALSE;
3115 }
3116
3117 pAsm->S[1].bits = pAsm->S[0].bits;
3118 flipneg_PVSSRC(&(pAsm->S[1].src));
3119
3120 if ( GL_FALSE == next_ins(pAsm) )
3121 {
3122 return GL_FALSE;
3123 }
3124
3125 return GL_TRUE;
3126 }
3127
3128 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
3129 {
3130 if( GL_FALSE == checkop2(pAsm) )
3131 {
3132 return GL_FALSE;
3133 }
3134
3135 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3136
3137 if( GL_FALSE == assemble_dst(pAsm) )
3138 {
3139 return GL_FALSE;
3140 }
3141
3142 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3143 {
3144 return GL_FALSE;
3145 }
3146
3147 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3148 {
3149 return GL_FALSE;
3150 }
3151
3152 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
3153 {
3154 flipneg_PVSSRC(&(pAsm->S[1].src));
3155 }
3156
3157 if( GL_FALSE == next_ins(pAsm) )
3158 {
3159 return GL_FALSE;
3160 }
3161
3162 return GL_TRUE;
3163 }
3164
3165 GLboolean assemble_ARL(r700_AssemblerBase *pAsm)
3166 { /* TODO: ar values dont' persist between clauses */
3167 if( GL_FALSE == checkop1(pAsm) )
3168 {
3169 return GL_FALSE;
3170 }
3171
3172 pAsm->D.dst.opcode = SQ_OP2_INST_MOVA_FLOOR;
3173 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3174 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3175 pAsm->D.dst.reg = 0;
3176 pAsm->D.dst.writex = 0;
3177 pAsm->D.dst.writey = 0;
3178 pAsm->D.dst.writez = 0;
3179 pAsm->D.dst.writew = 0;
3180
3181 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3182 {
3183 return GL_FALSE;
3184 }
3185
3186 if( GL_FALSE == next_ins(pAsm) )
3187 {
3188 return GL_FALSE;
3189 }
3190
3191 return GL_TRUE;
3192 }
3193
3194 GLboolean assemble_BAD(char *opcode_str)
3195 {
3196 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
3197 return GL_FALSE;
3198 }
3199
3200 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
3201 {
3202 int tmp;
3203
3204 if( GL_FALSE == checkop3(pAsm) )
3205 {
3206 return GL_FALSE;
3207 }
3208
3209 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
3210 pAsm->D.dst.op3 = 1;
3211
3212 tmp = (-1);
3213
3214 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3215 {
3216 //OP3 has no support for write mask
3217 tmp = gethelpr(pAsm);
3218
3219 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3220 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3221 pAsm->D.dst.reg = tmp;
3222
3223 nomask_PVSDST(&(pAsm->D.dst));
3224 }
3225 else
3226 {
3227 if( GL_FALSE == assemble_dst(pAsm) )
3228 {
3229 return GL_FALSE;
3230 }
3231 }
3232
3233 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3234 {
3235 return GL_FALSE;
3236 }
3237
3238 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
3239 {
3240 return GL_FALSE;
3241 }
3242
3243 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
3244 {
3245 return GL_FALSE;
3246 }
3247
3248 if ( GL_FALSE == next_ins(pAsm) )
3249 {
3250 return GL_FALSE;
3251 }
3252
3253 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3254 {
3255 if( GL_FALSE == assemble_dst(pAsm) )
3256 {
3257 return GL_FALSE;
3258 }
3259
3260 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3261
3262 //tmp for source
3263 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3264 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3265 pAsm->S[0].src.reg = tmp;
3266
3267 noneg_PVSSRC(&(pAsm->S[0].src));
3268 noswizzle_PVSSRC(&(pAsm->S[0].src));
3269
3270 if( GL_FALSE == next_ins(pAsm) )
3271 {
3272 return GL_FALSE;
3273 }
3274 }
3275
3276 return GL_TRUE;
3277 }
3278
3279 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
3280 {
3281 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
3282 }
3283
3284 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
3285 {
3286 if( GL_FALSE == checkop2(pAsm) )
3287 {
3288 return GL_FALSE;
3289 }
3290
3291 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
3292
3293 if( GL_FALSE == assemble_dst(pAsm) )
3294 {
3295 return GL_FALSE;
3296 }
3297
3298 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3299 {
3300 return GL_FALSE;
3301 }
3302
3303 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3304 {
3305 return GL_FALSE;
3306 }
3307
3308 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
3309 {
3310 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
3311 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
3312 }
3313 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
3314 {
3315 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3316 }
3317
3318 if ( GL_FALSE == next_ins(pAsm) )
3319 {
3320 return GL_FALSE;
3321 }
3322
3323 return GL_TRUE;
3324 }
3325
3326 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
3327 {
3328 if( GL_FALSE == checkop2(pAsm) )
3329 {
3330 return GL_FALSE;
3331 }
3332
3333 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3334
3335 if( GL_FALSE == assemble_dst(pAsm) )
3336 {
3337 return GL_FALSE;
3338 }
3339
3340 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3341 {
3342 return GL_FALSE;
3343 }
3344
3345 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3346 {
3347 return GL_FALSE;
3348 }
3349
3350 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
3351 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
3352
3353 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
3354 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
3355
3356 if ( GL_FALSE == next_ins(pAsm) )
3357 {
3358 return GL_FALSE;
3359 }
3360
3361 return GL_TRUE;
3362 }
3363
3364 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
3365 {
3366 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
3367 }
3368
3369 GLboolean assemble_EXP(r700_AssemblerBase *pAsm)
3370 {
3371 BITS tmp;
3372
3373 checkop1(pAsm);
3374
3375 tmp = gethelpr(pAsm);
3376
3377 // FLOOR tmp.x, a.x
3378 // EX2 dst.x tmp.x
3379
3380 if (pAsm->pILInst->DstReg.WriteMask & 0x1) {
3381 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3382
3383 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3384 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3385 pAsm->D.dst.reg = tmp;
3386 pAsm->D.dst.writex = 1;
3387
3388 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3389 {
3390 return GL_FALSE;
3391 }
3392
3393 if( GL_FALSE == next_ins(pAsm) )
3394 {
3395 return GL_FALSE;
3396 }
3397
3398 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3399 pAsm->D.dst.math = 1;
3400
3401 if( GL_FALSE == assemble_dst(pAsm) )
3402 {
3403 return GL_FALSE;
3404 }
3405
3406 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3407
3408 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3409 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3410 pAsm->S[0].src.reg = tmp;
3411
3412 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3413 noneg_PVSSRC(&(pAsm->S[0].src));
3414
3415 if( GL_FALSE == next_ins(pAsm) )
3416 {
3417 return GL_FALSE;
3418 }
3419 }
3420
3421 // FRACT dst.y a.x
3422
3423 if ((pAsm->pILInst->DstReg.WriteMask >> 1) & 0x1) {
3424 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3425
3426 if( GL_FALSE == assemble_dst(pAsm) )
3427 {
3428 return GL_FALSE;
3429 }
3430
3431 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3432 {
3433 return GL_FALSE;
3434 }
3435
3436 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3437
3438 if( GL_FALSE == next_ins(pAsm) )
3439 {
3440 return GL_FALSE;
3441 }
3442 }
3443
3444 // EX2 dst.z, a.x
3445
3446 if ((pAsm->pILInst->DstReg.WriteMask >> 2) & 0x1) {
3447 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3448 pAsm->D.dst.math = 1;
3449
3450 if( GL_FALSE == assemble_dst(pAsm) )
3451 {
3452 return GL_FALSE;
3453 }
3454
3455 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3456 {
3457 return GL_FALSE;
3458 }
3459
3460 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3461
3462 if( GL_FALSE == next_ins(pAsm) )
3463 {
3464 return GL_FALSE;
3465 }
3466 }
3467
3468 // MOV dst.w 1.0
3469
3470 if ((pAsm->pILInst->DstReg.WriteMask >> 3) & 0x1) {
3471 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3472
3473 if( GL_FALSE == assemble_dst(pAsm) )
3474 {
3475 return GL_FALSE;
3476 }
3477
3478 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3479
3480 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3481 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3482 pAsm->S[0].src.reg = tmp;
3483
3484 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3485 noneg_PVSSRC(&(pAsm->S[0].src));
3486
3487 if( GL_FALSE == next_ins(pAsm) )
3488 {
3489 return GL_FALSE;
3490 }
3491 }
3492
3493 return GL_TRUE;
3494 }
3495
3496 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
3497 {
3498 checkop1(pAsm);
3499
3500 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3501
3502 if ( GL_FALSE == assemble_dst(pAsm) )
3503 {
3504 return GL_FALSE;
3505 }
3506
3507 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3508 {
3509 return GL_FALSE;
3510 }
3511
3512 if ( GL_FALSE == next_ins(pAsm) )
3513 {
3514 return GL_FALSE;
3515 }
3516
3517 return GL_TRUE;
3518 }
3519
3520 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
3521 {
3522 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
3523 }
3524
3525 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
3526 {
3527 checkop1(pAsm);
3528
3529 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
3530
3531 if ( GL_FALSE == assemble_dst(pAsm) )
3532 {
3533 return GL_FALSE;
3534 }
3535
3536 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
3537 {
3538 return GL_FALSE;
3539 }
3540
3541 if ( GL_FALSE == next_ins(pAsm) )
3542 {
3543 return GL_FALSE;
3544 }
3545
3546 return GL_TRUE;
3547 }
3548
3549 GLboolean assemble_KIL(r700_AssemblerBase *pAsm, GLuint opcode)
3550 {
3551 checkop2(pAsm);
3552
3553 pAsm->D.dst.opcode = opcode;
3554 pAsm->D.dst.math = 1;
3555
3556 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3557 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3558 pAsm->D.dst.reg = 0;
3559 pAsm->D.dst.writex = 0;
3560 pAsm->D.dst.writey = 0;
3561 pAsm->D.dst.writez = 0;
3562 pAsm->D.dst.writew = 0;
3563
3564 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3565 {
3566 return GL_FALSE;
3567 }
3568
3569 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3570 {
3571 return GL_FALSE;
3572 }
3573
3574 if ( GL_FALSE == next_ins2(pAsm) )
3575 {
3576 return GL_FALSE;
3577 }
3578
3579 /* Doc says KILL has to be last(end) ALU clause */
3580 pAsm->pR700Shader->killIsUsed = GL_TRUE;
3581 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
3582
3583 return GL_TRUE;
3584 }
3585
3586 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
3587 {
3588 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
3589 }
3590
3591 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
3592 {
3593 BITS tmp;
3594
3595 if( GL_FALSE == checkop3(pAsm) )
3596 {
3597 return GL_FALSE;
3598 }
3599
3600 tmp = gethelpr(pAsm);
3601
3602 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3603
3604 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3605 pAsm->D.dst.reg = tmp;
3606 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3607 nomask_PVSDST(&(pAsm->D.dst));
3608
3609
3610 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3611 {
3612 return GL_FALSE;
3613 }
3614
3615 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
3616 {
3617 return GL_FALSE;
3618 }
3619
3620 neg_PVSSRC(&(pAsm->S[1].src));
3621
3622 if( GL_FALSE == next_ins(pAsm) )
3623 {
3624 return GL_FALSE;
3625 }
3626
3627 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3628 pAsm->D.dst.op3 = 1;
3629
3630 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3631 pAsm->D.dst.reg = tmp;
3632 nomask_PVSDST(&(pAsm->D.dst));
3633 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3634
3635 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3636 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3637 pAsm->S[0].src.reg = tmp;
3638 noswizzle_PVSSRC(&(pAsm->S[0].src));
3639
3640
3641 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3642 {
3643 return GL_FALSE;
3644 }
3645 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3646 {
3647 return GL_FALSE;
3648 }
3649
3650 if( GL_FALSE == next_ins(pAsm) )
3651 {
3652 return GL_FALSE;
3653 }
3654
3655 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3656
3657 if( GL_FALSE == assemble_dst(pAsm) )
3658 {
3659 return GL_FALSE;
3660 }
3661
3662 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3663 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3664 pAsm->S[0].src.reg = tmp;
3665 noswizzle_PVSSRC(&(pAsm->S[0].src));
3666
3667 if( GL_FALSE == next_ins(pAsm) )
3668 {
3669 return GL_FALSE;
3670 }
3671
3672 return GL_TRUE;
3673 }
3674
3675 GLboolean assemble_LOG(r700_AssemblerBase *pAsm)
3676 {
3677 BITS tmp1, tmp2, tmp3;
3678
3679 checkop1(pAsm);
3680
3681 tmp1 = gethelpr(pAsm);
3682 tmp2 = gethelpr(pAsm);
3683 tmp3 = gethelpr(pAsm);
3684
3685 // FIXME: The hardware can do fabs() directly on input
3686 // elements, but the compiler doesn't have the
3687 // capability to use that.
3688
3689 // MAX tmp1.x, a.x, -a.x (fabs(a.x))
3690
3691 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3692
3693 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3694 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3695 pAsm->D.dst.reg = tmp1;
3696 pAsm->D.dst.writex = 1;
3697
3698 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3699 {
3700 return GL_FALSE;
3701 }
3702
3703 pAsm->S[1].bits = pAsm->S[0].bits;
3704 flipneg_PVSSRC(&(pAsm->S[1].src));
3705
3706 if ( GL_FALSE == next_ins(pAsm) )
3707 {
3708 return GL_FALSE;
3709 }
3710
3711 // Entire algo:
3712 //
3713 // LG2 tmp2.x, tmp1.x
3714 // FLOOR tmp3.x, tmp2.x
3715 // MOV dst.x, tmp3.x
3716 // ADD tmp3.x, tmp2.x, -tmp3.x
3717 // EX2 dst.y, tmp3.x
3718 // MOV dst.z, tmp2.x
3719 // MOV dst.w, 1.0
3720
3721 // LG2 tmp2.x, tmp1.x
3722 // FLOOR tmp3.x, tmp2.x
3723
3724 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3725 pAsm->D.dst.math = 1;
3726
3727 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3728 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3729 pAsm->D.dst.reg = tmp2;
3730 pAsm->D.dst.writex = 1;
3731
3732 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3733 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3734 pAsm->S[0].src.reg = tmp1;
3735
3736 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3737 noneg_PVSSRC(&(pAsm->S[0].src));
3738
3739 if( GL_FALSE == next_ins(pAsm) )
3740 {
3741 return GL_FALSE;
3742 }
3743
3744 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
3745
3746 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3747 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3748 pAsm->D.dst.reg = tmp3;
3749 pAsm->D.dst.writex = 1;
3750
3751 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3752 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3753 pAsm->S[0].src.reg = tmp2;
3754
3755 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3756 noneg_PVSSRC(&(pAsm->S[0].src));
3757
3758 if( GL_FALSE == next_ins(pAsm) )
3759 {
3760 return GL_FALSE;
3761 }
3762
3763 // MOV dst.x, tmp3.x
3764
3765 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3766
3767 if( GL_FALSE == assemble_dst(pAsm) )
3768 {
3769 return GL_FALSE;
3770 }
3771
3772 pAsm->D.dst.writey = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3773
3774 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3775 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3776 pAsm->S[0].src.reg = tmp3;
3777
3778 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3779 noneg_PVSSRC(&(pAsm->S[0].src));
3780
3781 if( GL_FALSE == next_ins(pAsm) )
3782 {
3783 return GL_FALSE;
3784 }
3785
3786 // ADD tmp3.x, tmp2.x, -tmp3.x
3787 // EX2 dst.y, tmp3.x
3788
3789 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3790
3791 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3792 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3793 pAsm->D.dst.reg = tmp3;
3794 pAsm->D.dst.writex = 1;
3795
3796 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3797 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3798 pAsm->S[0].src.reg = tmp2;
3799
3800 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3801 noneg_PVSSRC(&(pAsm->S[0].src));
3802
3803 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3804 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
3805 pAsm->S[1].src.reg = tmp3;
3806
3807 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_X);
3808 neg_PVSSRC(&(pAsm->S[1].src));
3809
3810 if( GL_FALSE == next_ins(pAsm) )
3811 {
3812 return GL_FALSE;
3813 }
3814
3815 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3816 pAsm->D.dst.math = 1;
3817
3818 if( GL_FALSE == assemble_dst(pAsm) )
3819 {
3820 return GL_FALSE;
3821 }
3822
3823 pAsm->D.dst.writex = pAsm->D.dst.writez = pAsm->D.dst.writew = 0;
3824
3825 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3826 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3827 pAsm->S[0].src.reg = tmp3;
3828
3829 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3830 noneg_PVSSRC(&(pAsm->S[0].src));
3831
3832 if( GL_FALSE == next_ins(pAsm) )
3833 {
3834 return GL_FALSE;
3835 }
3836
3837 // MOV dst.z, tmp2.x
3838
3839 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3840
3841 if( GL_FALSE == assemble_dst(pAsm) )
3842 {
3843 return GL_FALSE;
3844 }
3845
3846 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writew = 0;
3847
3848 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3849 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3850 pAsm->S[0].src.reg = tmp2;
3851
3852 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3853 noneg_PVSSRC(&(pAsm->S[0].src));
3854
3855 if( GL_FALSE == next_ins(pAsm) )
3856 {
3857 return GL_FALSE;
3858 }
3859
3860 // MOV dst.w 1.0
3861
3862 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3863
3864 if( GL_FALSE == assemble_dst(pAsm) )
3865 {
3866 return GL_FALSE;
3867 }
3868
3869 pAsm->D.dst.writex = pAsm->D.dst.writey = pAsm->D.dst.writez = 0;
3870
3871 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3872 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3873 pAsm->S[0].src.reg = tmp1;
3874
3875 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_1);
3876 noneg_PVSSRC(&(pAsm->S[0].src));
3877
3878 if( GL_FALSE == next_ins(pAsm) )
3879 {
3880 return GL_FALSE;
3881 }
3882
3883 return GL_TRUE;
3884 }
3885
3886 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
3887 {
3888 int tmp, ii;
3889 GLboolean bReplaceDst = GL_FALSE;
3890 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
3891
3892 if( GL_FALSE == checkop3(pAsm) )
3893 {
3894 return GL_FALSE;
3895 }
3896
3897 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3898 pAsm->D.dst.op3 = 1;
3899
3900 tmp = (-1);
3901
3902 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
3903 { /* TODO : more investigation on MAD src and dst using same register */
3904 for(ii=0; ii<3; ii++)
3905 {
3906 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
3907 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
3908 {
3909 bReplaceDst = GL_TRUE;
3910 break;
3911 }
3912 }
3913 }
3914 if(0xF != pILInst->DstReg.WriteMask)
3915 { /* OP3 has no support for write mask */
3916 bReplaceDst = GL_TRUE;
3917 }
3918
3919 if(GL_TRUE == bReplaceDst)
3920 {
3921 tmp = gethelpr(pAsm);
3922
3923 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3924 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3925 pAsm->D.dst.reg = tmp;
3926
3927 nomask_PVSDST(&(pAsm->D.dst));
3928 }
3929 else
3930 {
3931 if( GL_FALSE == assemble_dst(pAsm) )
3932 {
3933 return GL_FALSE;
3934 }
3935 }
3936
3937 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3938 {
3939 return GL_FALSE;
3940 }
3941
3942 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3943 {
3944 return GL_FALSE;
3945 }
3946
3947 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
3948 {
3949 return GL_FALSE;
3950 }
3951
3952 if ( GL_FALSE == next_ins(pAsm) )
3953 {
3954 return GL_FALSE;
3955 }
3956
3957 if (GL_TRUE == bReplaceDst)
3958 {
3959 if( GL_FALSE == assemble_dst(pAsm) )
3960 {
3961 return GL_FALSE;
3962 }
3963
3964 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3965
3966 //tmp for source
3967 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3968 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3969 pAsm->S[0].src.reg = tmp;
3970
3971 noneg_PVSSRC(&(pAsm->S[0].src));
3972 noswizzle_PVSSRC(&(pAsm->S[0].src));
3973
3974 if( GL_FALSE == next_ins(pAsm) )
3975 {
3976 return GL_FALSE;
3977 }
3978 }
3979
3980 return GL_TRUE;
3981 }
3982
3983 /* LIT dst, src */
3984 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
3985 {
3986 unsigned int dstReg;
3987 unsigned int dstType;
3988 unsigned int srcReg;
3989 unsigned int srcType;
3990 checkop1(pAsm);
3991 int tmp = gethelpr(pAsm);
3992
3993 if( GL_FALSE == assemble_dst(pAsm) )
3994 {
3995 return GL_FALSE;
3996 }
3997 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3998 {
3999 return GL_FALSE;
4000 }
4001 dstReg = pAsm->D.dst.reg;
4002 dstType = pAsm->D.dst.rtype;
4003 srcReg = pAsm->S[0].src.reg;
4004 srcType = pAsm->S[0].src.rtype;
4005
4006 /* dst.xw, <- 1.0 */
4007 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4008 pAsm->D.dst.rtype = dstType;
4009 pAsm->D.dst.reg = dstReg;
4010 pAsm->D.dst.writex = 1;
4011 pAsm->D.dst.writey = 0;
4012 pAsm->D.dst.writez = 0;
4013 pAsm->D.dst.writew = 1;
4014 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4015 pAsm->S[0].src.reg = tmp;
4016 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4017 noneg_PVSSRC(&(pAsm->S[0].src));
4018 pAsm->S[0].src.swizzlex = SQ_SEL_1;
4019 pAsm->S[0].src.swizzley = SQ_SEL_1;
4020 pAsm->S[0].src.swizzlez = SQ_SEL_1;
4021 pAsm->S[0].src.swizzlew = SQ_SEL_1;
4022 if( GL_FALSE == next_ins(pAsm) )
4023 {
4024 return GL_FALSE;
4025 }
4026
4027 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4028 {
4029 return GL_FALSE;
4030 }
4031
4032 /* dst.y = max(src.x, 0.0) */
4033 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4034 pAsm->D.dst.rtype = dstType;
4035 pAsm->D.dst.reg = dstReg;
4036 pAsm->D.dst.writex = 0;
4037 pAsm->D.dst.writey = 1;
4038 pAsm->D.dst.writez = 0;
4039 pAsm->D.dst.writew = 0;
4040 pAsm->S[0].src.rtype = srcType;
4041 pAsm->S[0].src.reg = srcReg;
4042 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4043 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4044 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4045 pAsm->S[1].src.reg = tmp;
4046 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4047 noneg_PVSSRC(&(pAsm->S[1].src));
4048 pAsm->S[1].src.swizzlex = SQ_SEL_0;
4049 pAsm->S[1].src.swizzley = SQ_SEL_0;
4050 pAsm->S[1].src.swizzlez = SQ_SEL_0;
4051 pAsm->S[1].src.swizzlew = SQ_SEL_0;
4052 if( GL_FALSE == next_ins(pAsm) )
4053 {
4054 return GL_FALSE;
4055 }
4056
4057 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4058 {
4059 return GL_FALSE;
4060 }
4061
4062 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
4063
4064 /* dst.z = log(src.y) */
4065 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
4066 pAsm->D.dst.math = 1;
4067 pAsm->D.dst.rtype = dstType;
4068 pAsm->D.dst.reg = dstReg;
4069 pAsm->D.dst.writex = 0;
4070 pAsm->D.dst.writey = 0;
4071 pAsm->D.dst.writez = 1;
4072 pAsm->D.dst.writew = 0;
4073 pAsm->S[0].src.rtype = srcType;
4074 pAsm->S[0].src.reg = srcReg;
4075 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4076 if( GL_FALSE == next_ins(pAsm) )
4077 {
4078 return GL_FALSE;
4079 }
4080
4081 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4082 {
4083 return GL_FALSE;
4084 }
4085
4086 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
4087 {
4088 return GL_FALSE;
4089 }
4090
4091 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4092
4093 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
4094
4095 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
4096 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
4097 pAsm->D.dst.math = 1;
4098 pAsm->D.dst.op3 = 1;
4099 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4100 pAsm->D.dst.reg = tmp;
4101 pAsm->D.dst.writex = 1;
4102 pAsm->D.dst.writey = 0;
4103 pAsm->D.dst.writez = 0;
4104 pAsm->D.dst.writew = 0;
4105
4106 pAsm->S[0].src.rtype = srcType;
4107 pAsm->S[0].src.reg = srcReg;
4108 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4109
4110 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4111 pAsm->S[1].src.reg = dstReg;
4112 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4113 noneg_PVSSRC(&(pAsm->S[1].src));
4114 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
4115 pAsm->S[1].src.swizzley = SQ_SEL_Z;
4116 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4117 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
4118
4119 pAsm->S[2].src.rtype = srcType;
4120 pAsm->S[2].src.reg = srcReg;
4121 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4122
4123 if( GL_FALSE == next_ins(pAsm) )
4124 {
4125 return GL_FALSE;
4126 }
4127
4128 /* dst.z = exp(tmp.x) */
4129 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4130 pAsm->D.dst.math = 1;
4131 pAsm->D.dst.rtype = dstType;
4132 pAsm->D.dst.reg = dstReg;
4133 pAsm->D.dst.writex = 0;
4134 pAsm->D.dst.writey = 0;
4135 pAsm->D.dst.writez = 1;
4136 pAsm->D.dst.writew = 0;
4137
4138 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4139 pAsm->S[0].src.reg = tmp;
4140 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4141 noneg_PVSSRC(&(pAsm->S[0].src));
4142 pAsm->S[0].src.swizzlex = SQ_SEL_X;
4143 pAsm->S[0].src.swizzley = SQ_SEL_X;
4144 pAsm->S[0].src.swizzlez = SQ_SEL_X;
4145 pAsm->S[0].src.swizzlew = SQ_SEL_X;
4146
4147 if( GL_FALSE == next_ins(pAsm) )
4148 {
4149 return GL_FALSE;
4150 }
4151
4152 return GL_TRUE;
4153 }
4154
4155 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
4156 {
4157 if( GL_FALSE == checkop2(pAsm) )
4158 {
4159 return GL_FALSE;
4160 }
4161
4162 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4163
4164 if( GL_FALSE == assemble_dst(pAsm) )
4165 {
4166 return GL_FALSE;
4167 }
4168
4169 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4170 {
4171 return GL_FALSE;
4172 }
4173
4174 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4175 {
4176 return GL_FALSE;
4177 }
4178
4179 if( GL_FALSE == next_ins(pAsm) )
4180 {
4181 return GL_FALSE;
4182 }
4183
4184 return GL_TRUE;
4185 }
4186
4187 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
4188 {
4189 if( GL_FALSE == checkop2(pAsm) )
4190 {
4191 return GL_FALSE;
4192 }
4193
4194 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
4195
4196 if( GL_FALSE == assemble_dst(pAsm) )
4197 {
4198 return GL_FALSE;
4199 }
4200
4201 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4202 {
4203 return GL_FALSE;
4204 }
4205
4206 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4207 {
4208 return GL_FALSE;
4209 }
4210
4211 if( GL_FALSE == next_ins(pAsm) )
4212 {
4213 return GL_FALSE;
4214 }
4215
4216 return GL_TRUE;
4217 }
4218
4219 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
4220 {
4221 checkop1(pAsm);
4222
4223 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4224
4225 if (GL_FALSE == assemble_dst(pAsm))
4226 {
4227 return GL_FALSE;
4228 }
4229
4230 if (GL_FALSE == assemble_src(pAsm, 0, -1))
4231 {
4232 return GL_FALSE;
4233 }
4234
4235 if ( GL_FALSE == next_ins(pAsm) )
4236 {
4237 return GL_FALSE;
4238 }
4239
4240 return GL_TRUE;
4241 }
4242
4243 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
4244 {
4245 if( GL_FALSE == checkop2(pAsm) )
4246 {
4247 return GL_FALSE;
4248 }
4249
4250 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4251
4252 if( GL_FALSE == assemble_dst(pAsm) )
4253 {
4254 return GL_FALSE;
4255 }
4256
4257 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4258 {
4259 return GL_FALSE;
4260 }
4261
4262 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4263 {
4264 return GL_FALSE;
4265 }
4266
4267 if( GL_FALSE == next_ins(pAsm) )
4268 {
4269 return GL_FALSE;
4270 }
4271
4272 return GL_TRUE;
4273 }
4274
4275 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
4276 {
4277 BITS tmp;
4278
4279 checkop1(pAsm);
4280
4281 tmp = gethelpr(pAsm);
4282
4283 // LG2 tmp.x, a.swizzle
4284 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
4285 pAsm->D.dst.math = 1;
4286
4287 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4288 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4289 pAsm->D.dst.reg = tmp;
4290 nomask_PVSDST(&(pAsm->D.dst));
4291
4292 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4293 {
4294 return GL_FALSE;
4295 }
4296
4297 if( GL_FALSE == next_ins(pAsm) )
4298 {
4299 return GL_FALSE;
4300 }
4301
4302 // MUL tmp.x, tmp.x, b.swizzle
4303 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4304
4305 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4306 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4307 pAsm->D.dst.reg = tmp;
4308 nomask_PVSDST(&(pAsm->D.dst));
4309
4310 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4311 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4312 pAsm->S[0].src.reg = tmp;
4313 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4314 noneg_PVSSRC(&(pAsm->S[0].src));
4315
4316 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4317 {
4318 return GL_FALSE;
4319 }
4320
4321 if( GL_FALSE == next_ins(pAsm) )
4322 {
4323 return GL_FALSE;
4324 }
4325
4326 // EX2 dst.mask, tmp.x
4327 // EX2 tmp.x, tmp.x
4328 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
4329 pAsm->D.dst.math = 1;
4330
4331 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4332 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4333 pAsm->D.dst.reg = tmp;
4334 nomask_PVSDST(&(pAsm->D.dst));
4335
4336 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4337 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4338 pAsm->S[0].src.reg = tmp;
4339 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4340 noneg_PVSSRC(&(pAsm->S[0].src));
4341
4342 if( GL_FALSE == next_ins(pAsm) )
4343 {
4344 return GL_FALSE;
4345 }
4346
4347 // Now replicate result to all necessary channels in destination
4348 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4349
4350 if( GL_FALSE == assemble_dst(pAsm) )
4351 {
4352 return GL_FALSE;
4353 }
4354
4355 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4356 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4357 pAsm->S[0].src.reg = tmp;
4358
4359 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
4360 noneg_PVSSRC(&(pAsm->S[0].src));
4361
4362 if( GL_FALSE == next_ins(pAsm) )
4363 {
4364 return GL_FALSE;
4365 }
4366
4367 return GL_TRUE;
4368 }
4369
4370 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
4371 {
4372 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
4373 }
4374
4375 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
4376 {
4377 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
4378 }
4379
4380 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
4381 {
4382 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
4383 }
4384
4385 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
4386 {
4387 BITS tmp;
4388
4389 checkop1(pAsm);
4390
4391 tmp = gethelpr(pAsm);
4392
4393 // COS tmp.x, a.x
4394 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
4395 pAsm->D.dst.math = 1;
4396
4397 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4398 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4399 pAsm->D.dst.reg = tmp;
4400 pAsm->D.dst.writex = 1;
4401
4402 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4403 {
4404 return GL_FALSE;
4405 }
4406
4407 if ( GL_FALSE == next_ins(pAsm) )
4408 {
4409 return GL_FALSE;
4410 }
4411
4412 // SIN tmp.y, a.x
4413 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
4414 pAsm->D.dst.math = 1;
4415
4416 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4417 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4418 pAsm->D.dst.reg = tmp;
4419 pAsm->D.dst.writey = 1;
4420
4421 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4422 {
4423 return GL_FALSE;
4424 }
4425
4426 if( GL_FALSE == next_ins(pAsm) )
4427 {
4428 return GL_FALSE;
4429 }
4430
4431 // MOV dst.mask, tmp
4432 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4433
4434 if( GL_FALSE == assemble_dst(pAsm) )
4435 {
4436 return GL_FALSE;
4437 }
4438
4439 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4440 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4441 pAsm->S[0].src.reg = tmp;
4442
4443 noswizzle_PVSSRC(&(pAsm->S[0].src));
4444 pAsm->S[0].src.swizzlez = SQ_SEL_0;
4445 pAsm->S[0].src.swizzlew = SQ_SEL_0;
4446
4447 if ( GL_FALSE == next_ins(pAsm) )
4448 {
4449 return GL_FALSE;
4450 }
4451
4452 return GL_TRUE;
4453 }
4454
4455 GLboolean assemble_LOGIC(r700_AssemblerBase *pAsm, BITS opcode)
4456 {
4457 if( GL_FALSE == checkop2(pAsm) )
4458 {
4459 return GL_FALSE;
4460 }
4461
4462 pAsm->D.dst.opcode = opcode;
4463 pAsm->D.dst.math = 1;
4464
4465 if( GL_FALSE == assemble_dst(pAsm) )
4466 {
4467 return GL_FALSE;
4468 }
4469
4470 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4471 {
4472 return GL_FALSE;
4473 }
4474
4475 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4476 {
4477 return GL_FALSE;
4478 }
4479
4480 if( GL_FALSE == next_ins(pAsm) )
4481 {
4482 return GL_FALSE;
4483 }
4484
4485 return GL_TRUE;
4486 }
4487
4488 GLboolean assemble_LOGIC_PRED(r700_AssemblerBase *pAsm, BITS opcode)
4489 {
4490 if( GL_FALSE == checkop2(pAsm) )
4491 {
4492 return GL_FALSE;
4493 }
4494
4495 pAsm->D.dst.opcode = opcode;
4496 pAsm->D.dst.math = 1;
4497 pAsm->D.dst.predicated = 1;
4498 pAsm->D2.dst2.SaturateMode = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
4499
4500 if( GL_FALSE == assemble_dst(pAsm) )
4501 {
4502 return GL_FALSE;
4503 }
4504
4505 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4506 {
4507 return GL_FALSE;
4508 }
4509
4510 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4511 {
4512 return GL_FALSE;
4513 }
4514
4515 if( GL_FALSE == next_ins2(pAsm) )
4516 {
4517 return GL_FALSE;
4518 }
4519
4520 return GL_TRUE;
4521 }
4522
4523 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
4524 {
4525 if( GL_FALSE == checkop2(pAsm) )
4526 {
4527 return GL_FALSE;
4528 }
4529
4530 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
4531
4532 if( GL_FALSE == assemble_dst(pAsm) )
4533 {
4534 return GL_FALSE;
4535 }
4536
4537 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4538 {
4539 return GL_FALSE;
4540 }
4541
4542 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4543 {
4544 return GL_FALSE;
4545 }
4546
4547 if( GL_FALSE == next_ins(pAsm) )
4548 {
4549 return GL_FALSE;
4550 }
4551
4552 return GL_TRUE;
4553 }
4554
4555 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
4556 {
4557 if( GL_FALSE == checkop2(pAsm) )
4558 {
4559 return GL_FALSE;
4560 }
4561
4562 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
4563
4564 if( GL_FALSE == assemble_dst(pAsm) )
4565 {
4566 return GL_FALSE;
4567 }
4568
4569 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4570 {
4571 return GL_FALSE;
4572 }
4573
4574 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
4575 {
4576 return GL_FALSE;
4577 }
4578
4579 if( GL_FALSE == next_ins(pAsm) )
4580 {
4581 return GL_FALSE;
4582 }
4583
4584 return GL_TRUE;
4585 }
4586
4587 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
4588 {
4589 return GL_TRUE;
4590 }
4591
4592 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
4593 {
4594 GLboolean src_const;
4595 GLboolean need_barrier = GL_FALSE;
4596
4597 checkop1(pAsm);
4598
4599 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
4600 {
4601 case PROGRAM_CONSTANT:
4602 case PROGRAM_LOCAL_PARAM:
4603 case PROGRAM_ENV_PARAM:
4604 case PROGRAM_STATE_VAR:
4605 src_const = GL_TRUE;
4606 break;
4607 case PROGRAM_TEMPORARY:
4608 case PROGRAM_INPUT:
4609 default:
4610 src_const = GL_FALSE;
4611 break;
4612 }
4613
4614 if (GL_TRUE == src_const)
4615 {
4616 if ( GL_FALSE == mov_temp(pAsm, 0) )
4617 return GL_FALSE;
4618 need_barrier = GL_TRUE;
4619 }
4620
4621 switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
4622 {
4623 case OPCODE_TEX:
4624 break;
4625 case OPCODE_TXB:
4626 radeon_error("do not support TXB yet\n");
4627 return GL_FALSE;
4628 break;
4629 case OPCODE_TXP:
4630 break;
4631 default:
4632 radeon_error("Internal error: bad texture op (not TEX)\n");
4633 return GL_FALSE;
4634 break;
4635 }
4636
4637 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4638 {
4639 GLuint tmp = gethelpr(pAsm);
4640 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4641 pAsm->D.dst.math = 1;
4642 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4643 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4644 pAsm->D.dst.reg = tmp;
4645 pAsm->D.dst.writew = 1;
4646
4647 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4648 {
4649 return GL_FALSE;
4650 }
4651 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
4652 if( GL_FALSE == next_ins(pAsm) )
4653 {
4654 return GL_FALSE;
4655 }
4656
4657 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4658 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4659 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4660 pAsm->D.dst.reg = tmp;
4661 pAsm->D.dst.writex = 1;
4662 pAsm->D.dst.writey = 1;
4663 pAsm->D.dst.writez = 1;
4664 pAsm->D.dst.writew = 0;
4665
4666 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4667 {
4668 return GL_FALSE;
4669 }
4670 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4671 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4672 pAsm->S[1].src.reg = tmp;
4673 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
4674
4675 if( GL_FALSE == next_ins(pAsm) )
4676 {
4677 return GL_FALSE;
4678 }
4679
4680 pAsm->aArgSubst[1] = tmp;
4681 need_barrier = GL_TRUE;
4682 }
4683
4684 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
4685 {
4686 GLuint tmp1 = gethelpr(pAsm);
4687 GLuint tmp2 = gethelpr(pAsm);
4688
4689 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
4690 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
4691 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4692 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4693 pAsm->D.dst.reg = tmp1;
4694 nomask_PVSDST(&(pAsm->D.dst));
4695
4696 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4697 {
4698 return GL_FALSE;
4699 }
4700
4701 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
4702 {
4703 return GL_FALSE;
4704 }
4705
4706 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
4707 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
4708
4709 if( GL_FALSE == next_ins(pAsm) )
4710 {
4711 return GL_FALSE;
4712 }
4713
4714 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
4715 * have to do explicit instruction
4716 */
4717 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
4718 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4719 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4720 pAsm->D.dst.reg = tmp1;
4721 pAsm->D.dst.writez = 1;
4722
4723 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4724 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4725 pAsm->S[0].src.reg = tmp1;
4726 noswizzle_PVSSRC(&(pAsm->S[0].src));
4727 pAsm->S[1].bits = pAsm->S[0].bits;
4728 flipneg_PVSSRC(&(pAsm->S[1].src));
4729
4730 next_ins(pAsm);
4731
4732 /* tmp1.z = RCP_e(|tmp1.z|) */
4733 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
4734 pAsm->D.dst.math = 1;
4735 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4736 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4737 pAsm->D.dst.reg = tmp1;
4738 pAsm->D.dst.writez = 1;
4739
4740 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4741 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4742 pAsm->S[0].src.reg = tmp1;
4743 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
4744
4745 next_ins(pAsm);
4746
4747 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
4748 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
4749 * muladd has no writemask, have to use another temp
4750 * also no support for imm constants, so add 1 here
4751 */
4752 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4753 pAsm->D.dst.op3 = 1;
4754 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4755 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4756 pAsm->D.dst.reg = tmp2;
4757
4758 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4759 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4760 pAsm->S[0].src.reg = tmp1;
4761 noswizzle_PVSSRC(&(pAsm->S[0].src));
4762 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4763 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4764 pAsm->S[1].src.reg = tmp1;
4765 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
4766 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
4767 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4768 pAsm->S[2].src.reg = tmp1;
4769 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
4770
4771 next_ins(pAsm);
4772
4773 /* ADD the remaining .5 */
4774 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
4775 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4776 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4777 pAsm->D.dst.reg = tmp2;
4778 pAsm->D.dst.writex = 1;
4779 pAsm->D.dst.writey = 1;
4780 pAsm->D.dst.writez = 0;
4781 pAsm->D.dst.writew = 0;
4782
4783 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4784 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4785 pAsm->S[0].src.reg = tmp2;
4786 noswizzle_PVSSRC(&(pAsm->S[0].src));
4787 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
4788 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4789 pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
4790 noswizzle_PVSSRC(&(pAsm->S[1].src));
4791
4792 next_ins(pAsm);
4793
4794 /* tmp1.xy = temp2.xy */
4795 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4796 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4797 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4798 pAsm->D.dst.reg = tmp1;
4799 pAsm->D.dst.writex = 1;
4800 pAsm->D.dst.writey = 1;
4801 pAsm->D.dst.writez = 0;
4802 pAsm->D.dst.writew = 0;
4803
4804 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4805 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4806 pAsm->S[0].src.reg = tmp2;
4807 noswizzle_PVSSRC(&(pAsm->S[0].src));
4808
4809 next_ins(pAsm);
4810 pAsm->aArgSubst[1] = tmp1;
4811 need_barrier = GL_TRUE;
4812
4813 }
4814
4815 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
4816 pAsm->is_tex = GL_TRUE;
4817 if ( GL_TRUE == need_barrier )
4818 {
4819 pAsm->need_tex_barrier = GL_TRUE;
4820 }
4821 // Set src1 to tex unit id
4822 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
4823 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
4824
4825 //No sw info from mesa compiler, so hard code here.
4826 pAsm->S[1].src.swizzlex = SQ_SEL_X;
4827 pAsm->S[1].src.swizzley = SQ_SEL_Y;
4828 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
4829 pAsm->S[1].src.swizzlew = SQ_SEL_W;
4830
4831 if( GL_FALSE == tex_dst(pAsm) )
4832 {
4833 return GL_FALSE;
4834 }
4835
4836 if( GL_FALSE == tex_src(pAsm) )
4837 {
4838 return GL_FALSE;
4839 }
4840
4841 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
4842 {
4843 /* hopefully did swizzles before */
4844 noswizzle_PVSSRC(&(pAsm->S[0].src));
4845 }
4846
4847 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
4848 {
4849 /* SAMPLE dst, tmp.yxwy, CUBE */
4850 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
4851 pAsm->S[0].src.swizzley = SQ_SEL_X;
4852 pAsm->S[0].src.swizzlez = SQ_SEL_W;
4853 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
4854 }
4855
4856 if ( GL_FALSE == next_ins(pAsm) )
4857 {
4858 return GL_FALSE;
4859 }
4860
4861 return GL_TRUE;
4862 }
4863
4864 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
4865 {
4866 BITS tmp;
4867
4868 if( GL_FALSE == checkop2(pAsm) )
4869 {
4870 return GL_FALSE;
4871 }
4872
4873 tmp = gethelpr(pAsm);
4874
4875 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
4876
4877 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4878 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4879 pAsm->D.dst.reg = tmp;
4880 nomask_PVSDST(&(pAsm->D.dst));
4881
4882 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4883 {
4884 return GL_FALSE;
4885 }
4886
4887 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4888 {
4889 return GL_FALSE;
4890 }
4891
4892 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4893 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4894
4895 if( GL_FALSE == next_ins(pAsm) )
4896 {
4897 return GL_FALSE;
4898 }
4899
4900 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
4901 pAsm->D.dst.op3 = 1;
4902
4903 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4904 {
4905 tmp = gethelpr(pAsm);
4906
4907 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4908 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4909 pAsm->D.dst.reg = tmp;
4910
4911 nomask_PVSDST(&(pAsm->D.dst));
4912 }
4913 else
4914 {
4915 if( GL_FALSE == assemble_dst(pAsm) )
4916 {
4917 return GL_FALSE;
4918 }
4919 }
4920
4921 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
4922 {
4923 return GL_FALSE;
4924 }
4925
4926 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
4927 {
4928 return GL_FALSE;
4929 }
4930
4931 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
4932 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
4933
4934 // result1 + (neg) result0
4935 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
4936 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
4937 pAsm->S[2].src.reg = tmp;
4938
4939 neg_PVSSRC(&(pAsm->S[2].src));
4940 noswizzle_PVSSRC(&(pAsm->S[2].src));
4941
4942 if( GL_FALSE == next_ins(pAsm) )
4943 {
4944 return GL_FALSE;
4945 }
4946
4947
4948 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
4949 {
4950 if( GL_FALSE == assemble_dst(pAsm) )
4951 {
4952 return GL_FALSE;
4953 }
4954
4955 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4956
4957 // Use tmp as source
4958 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4959 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
4960 pAsm->S[0].src.reg = tmp;
4961
4962 noneg_PVSSRC(&(pAsm->S[0].src));
4963 noswizzle_PVSSRC(&(pAsm->S[0].src));
4964
4965 if( GL_FALSE == next_ins(pAsm) )
4966 {
4967 return GL_FALSE;
4968 }
4969 }
4970
4971 return GL_TRUE;
4972 }
4973
4974 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
4975 {
4976 return GL_TRUE;
4977 }
4978
4979 static inline void decreaseCurrent(r700_AssemblerBase *pAsm, GLuint uReason)
4980 {
4981 switch (uReason)
4982 {
4983 case FC_PUSH_VPM:
4984 pAsm->CALLSTACK[pAsm->CALLSP].current--;
4985 break;
4986 case FC_PUSH_WQM:
4987 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4988 break;
4989 case FC_LOOP:
4990 pAsm->CALLSTACK[pAsm->CALLSP].current -= 4;
4991 break;
4992 case FC_REP:
4993 /* TODO : for 16 vp asic, should -= 2; */
4994 pAsm->CALLSTACK[pAsm->CALLSP].current -= 1;
4995 break;
4996 };
4997 }
4998
4999 static inline void checkStackDepth(r700_AssemblerBase *pAsm, GLuint uReason, GLboolean bCheckMaxOnly)
5000 {
5001 if(GL_TRUE == bCheckMaxOnly)
5002 {
5003 switch (uReason)
5004 {
5005 case FC_PUSH_VPM:
5006 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 1)
5007 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5008 {
5009 pAsm->CALLSTACK[pAsm->CALLSP].max =
5010 pAsm->CALLSTACK[pAsm->CALLSP].current + 1;
5011 }
5012 break;
5013 case FC_PUSH_WQM:
5014 if((pAsm->CALLSTACK[pAsm->CALLSP].current + 4)
5015 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5016 {
5017 pAsm->CALLSTACK[pAsm->CALLSP].max =
5018 pAsm->CALLSTACK[pAsm->CALLSP].current + 4;
5019 }
5020 break;
5021 }
5022 return;
5023 }
5024
5025 switch (uReason)
5026 {
5027 case FC_PUSH_VPM:
5028 pAsm->CALLSTACK[pAsm->CALLSP].current++;
5029 break;
5030 case FC_PUSH_WQM:
5031 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5032 break;
5033 case FC_LOOP:
5034 pAsm->CALLSTACK[pAsm->CALLSP].current += 4;
5035 break;
5036 case FC_REP:
5037 /* TODO : for 16 vp asic, should += 2; */
5038 pAsm->CALLSTACK[pAsm->CALLSP].current += 1;
5039 break;
5040 };
5041
5042 if(pAsm->CALLSTACK[pAsm->CALLSP].current
5043 > pAsm->CALLSTACK[pAsm->CALLSP].max)
5044 {
5045 pAsm->CALLSTACK[pAsm->CALLSP].max =
5046 pAsm->CALLSTACK[pAsm->CALLSP].current;
5047 }
5048 }
5049
5050 GLboolean jumpToOffest(r700_AssemblerBase *pAsm, GLuint pops, GLint offset)
5051 {
5052 if(GL_FALSE == add_cf_instruction(pAsm) )
5053 {
5054 return GL_FALSE;
5055 }
5056
5057 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5058 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5059 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5060
5061 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5062 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5063 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5064 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5065
5066 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5067
5068 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + offset;
5069
5070 return GL_TRUE;
5071 }
5072
5073 GLboolean pops(r700_AssemblerBase *pAsm, GLuint pops)
5074 {
5075 if(GL_FALSE == add_cf_instruction(pAsm) )
5076 {
5077 return GL_FALSE;
5078 }
5079
5080 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = pops;
5081 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5082 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5083
5084 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5085 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5086 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5087
5088 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5089
5090 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5091 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5092
5093 return GL_TRUE;
5094 }
5095
5096 GLboolean assemble_IF(r700_AssemblerBase *pAsm, GLboolean bHasElse)
5097 {
5098 if(GL_FALSE == add_cf_instruction(pAsm) )
5099 {
5100 return GL_FALSE;
5101 }
5102
5103 if(GL_TRUE != bHasElse)
5104 {
5105 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5106 }
5107 else
5108 {
5109 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5110 }
5111 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5112 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5113
5114 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5115 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5116 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_JUMP;
5117 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5118
5119 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5120
5121 pAsm->FCSP++;
5122 pAsm->fc_stack[pAsm->FCSP].type = FC_IF;
5123 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5124 pAsm->fc_stack[pAsm->FCSP].midLen= 0;
5125 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5126
5127 #ifndef USE_CF_FOR_POP_AFTER
5128 if(GL_TRUE != bHasElse)
5129 {
5130 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5131 }
5132 #endif /* USE_CF_FOR_POP_AFTER */
5133
5134 checkStackDepth(pAsm, FC_PUSH_VPM, GL_FALSE);
5135
5136 return GL_TRUE;
5137 }
5138
5139 GLboolean assemble_ELSE(r700_AssemblerBase *pAsm)
5140 {
5141 if(GL_FALSE == add_cf_instruction(pAsm) )
5142 {
5143 return GL_FALSE;
5144 }
5145
5146 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1; ///
5147 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5148 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5149
5150 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5151 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5152 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ELSE;
5153 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5154
5155 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5156
5157 pAsm->fc_stack[pAsm->FCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc( (void *)pAsm->fc_stack[pAsm->FCSP].mid,
5158 0,
5159 sizeof(R700ControlFlowGenericClause *) );
5160 pAsm->fc_stack[pAsm->FCSP].mid[0] = pAsm->cf_current_cf_clause_ptr;
5161 //pAsm->fc_stack[pAsm->FCSP].unNumMid = 1;
5162
5163 #ifndef USE_CF_FOR_POP_AFTER
5164 pAsm->alu_x_opcode = SQ_CF_INST_ALU_POP_AFTER;
5165 #endif /* USE_CF_FOR_POP_AFTER */
5166
5167 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode - 1;
5168
5169 return GL_TRUE;
5170 }
5171
5172 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
5173 {
5174 #ifdef USE_CF_FOR_POP_AFTER
5175 pops(pAsm, 1);
5176 #endif /* USE_CF_FOR_POP_AFTER */
5177
5178 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5179
5180 if(NULL == pAsm->fc_stack[pAsm->FCSP].mid)
5181 {
5182 /* no else in between */
5183 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5184 }
5185 else
5186 {
5187 pAsm->fc_stack[pAsm->FCSP].mid[0]->m_Word0.f.addr = pAsm->pR700Shader->plstCFInstructions_active->uNumOfNode;
5188 }
5189
5190 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5191 {
5192 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5193 }
5194
5195 if(pAsm->fc_stack[pAsm->FCSP].type != FC_IF)
5196 {
5197 radeon_error("if/endif in shader code are not paired. \n");
5198 return GL_FALSE;
5199 }
5200
5201 pAsm->FCSP--;
5202
5203 decreaseCurrent(pAsm, FC_PUSH_VPM);
5204
5205 return GL_TRUE;
5206 }
5207
5208 GLboolean assemble_BGNLOOP(r700_AssemblerBase *pAsm)
5209 {
5210 if(GL_FALSE == add_cf_instruction(pAsm) )
5211 {
5212 return GL_FALSE;
5213 }
5214
5215
5216 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5217 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5218 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5219
5220 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5221 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5222 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_START_NO_AL;
5223 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5224
5225 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5226
5227 pAsm->FCSP++;
5228 pAsm->fc_stack[pAsm->FCSP].type = FC_LOOP;
5229 pAsm->fc_stack[pAsm->FCSP].mid = NULL;
5230 pAsm->fc_stack[pAsm->FCSP].unNumMid = 0;
5231 pAsm->fc_stack[pAsm->FCSP].midLen = 0;
5232 pAsm->fc_stack[pAsm->FCSP].first = pAsm->cf_current_cf_clause_ptr;
5233
5234 checkStackDepth(pAsm, FC_LOOP, GL_FALSE);
5235
5236 return GL_TRUE;
5237 }
5238
5239 GLboolean assemble_BRK(r700_AssemblerBase *pAsm)
5240 {
5241 #ifdef USE_CF_FOR_CONTINUE_BREAK
5242 unsigned int unFCSP;
5243 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5244 {
5245 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5246 {
5247 break;
5248 }
5249 }
5250 if(0 == FC_LOOP)
5251 {
5252 radeon_error("Break is not inside loop/endloop pair.\n");
5253 return GL_FALSE;
5254 }
5255
5256 if(GL_FALSE == add_cf_instruction(pAsm) )
5257 {
5258 return GL_FALSE;
5259 }
5260
5261
5262 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5263 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5264 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5265
5266 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5267 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5268 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5269
5270 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5271
5272 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5273
5274 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5275 (void *)pAsm->fc_stack[unFCSP].mid,
5276 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5277 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5278 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5279 pAsm->fc_stack[unFCSP].unNumMid++;
5280
5281 if(GL_FALSE == add_cf_instruction(pAsm) )
5282 {
5283 return GL_FALSE;
5284 }
5285
5286 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5287 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5288 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5289
5290 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5291 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5292 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5293
5294 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5295
5296 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5297 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5298
5299 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5300
5301 #endif //USE_CF_FOR_CONTINUE_BREAK
5302 return GL_TRUE;
5303 }
5304
5305 GLboolean assemble_CONT(r700_AssemblerBase *pAsm)
5306 {
5307 #ifdef USE_CF_FOR_CONTINUE_BREAK
5308 unsigned int unFCSP;
5309 for(unFCSP=pAsm->FCSP; unFCSP>0; unFCSP--)
5310 {
5311 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5312 {
5313 break;
5314 }
5315 }
5316 if(0 == FC_LOOP)
5317 {
5318 radeon_error("Continue is not inside loop/endloop pair.\n");
5319 return GL_FALSE;
5320 }
5321
5322 if(GL_FALSE == add_cf_instruction(pAsm) )
5323 {
5324 return GL_FALSE;
5325 }
5326
5327
5328 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5329 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5330 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5331
5332 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5333 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5334 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_CONTINUE;
5335
5336 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5337
5338 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5339
5340 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5341 (void *)pAsm->fc_stack[unFCSP].mid,
5342 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5343 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5344 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5345 pAsm->fc_stack[unFCSP].unNumMid++;
5346
5347 if(GL_FALSE == add_cf_instruction(pAsm) )
5348 {
5349 return GL_FALSE;
5350 }
5351
5352 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5353 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5354 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5355
5356 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5357 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5358 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_POP;
5359
5360 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5361
5362 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5363 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5364
5365 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5366
5367 #endif /* USE_CF_FOR_CONTINUE_BREAK */
5368
5369 return GL_TRUE;
5370 }
5371
5372 GLboolean assemble_ENDLOOP(r700_AssemblerBase *pAsm)
5373 {
5374 GLuint i;
5375
5376 if(GL_FALSE == add_cf_instruction(pAsm) )
5377 {
5378 return GL_FALSE;
5379 }
5380
5381
5382 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5383 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5384 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5385
5386 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5387 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5388 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_END;
5389 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5390
5391 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5392
5393 pAsm->cf_current_cf_clause_ptr->m_Word0.f.addr = pAsm->fc_stack[pAsm->FCSP].first->m_uIndex + 1;
5394 pAsm->fc_stack[pAsm->FCSP].first->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex + 1;
5395
5396 #ifdef USE_CF_FOR_CONTINUE_BREAK
5397 for(i=0; i<pAsm->fc_stack[pAsm->FCSP].unNumMid; i++)
5398 {
5399 pAsm->fc_stack[pAsm->FCSP].mid[i]->m_Word0.f.addr = pAsm->cf_current_cf_clause_ptr->m_uIndex;
5400 }
5401 if(NULL != pAsm->fc_stack[pAsm->FCSP].mid)
5402 {
5403 FREE(pAsm->fc_stack[pAsm->FCSP].mid);
5404 }
5405 #endif
5406
5407 if(pAsm->fc_stack[pAsm->FCSP].type != FC_LOOP)
5408 {
5409 radeon_error("loop/endloop in shader code are not paired. \n");
5410 return GL_FALSE;
5411 }
5412
5413 GLuint unFCSP;
5414 GLuint unIF = 0;
5415 if((pAsm->unCFflags & HAS_CURRENT_LOOPRET) > 0)
5416 {
5417 for(unFCSP=(pAsm->FCSP-1); unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5418 {
5419 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5420 {
5421 breakLoopOnFlag(pAsm, unFCSP);
5422 break;
5423 }
5424 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5425 {
5426 unIF++;
5427 }
5428 }
5429 if(unFCSP <= pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry)
5430 {
5431 #ifdef USE_CF_FOR_POP_AFTER
5432 returnOnFlag(pAsm, unIF);
5433 #else
5434 returnOnFlag(pAsm, 0);
5435 #endif /* USE_CF_FOR_POP_AFTER */
5436 pAsm->unCFflags &= ~HAS_CURRENT_LOOPRET;
5437 }
5438 }
5439
5440 pAsm->FCSP--;
5441
5442 decreaseCurrent(pAsm, FC_LOOP);
5443
5444 return GL_TRUE;
5445 }
5446
5447 void add_return_inst(r700_AssemblerBase *pAsm)
5448 {
5449 if(GL_FALSE == add_cf_instruction(pAsm) )
5450 {
5451 return GL_FALSE;
5452 }
5453 //pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5454 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5455 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5456 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5457
5458 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5459 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5460 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_RETURN;
5461 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5462
5463 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5464 }
5465
5466 GLboolean assemble_BGNSUB(r700_AssemblerBase *pAsm, GLint nILindex)
5467 {
5468 /* Put in sub */
5469 if( (pAsm->unSubArrayPointer + 1) > pAsm->unSubArraySize )
5470 {
5471 pAsm->subs = (SUB_OFFSET*)_mesa_realloc( (void *)pAsm->subs,
5472 sizeof(SUB_OFFSET) * pAsm->unSubArraySize,
5473 sizeof(SUB_OFFSET) * (pAsm->unSubArraySize + 10) );
5474 if(NULL == pAsm->subs)
5475 {
5476 return GL_FALSE;
5477 }
5478 pAsm->unSubArraySize += 10;
5479 }
5480
5481 pAsm->subs[pAsm->unSubArrayPointer].subIL_Offset = nILindex;
5482 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pHead=NULL;
5483 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.pTail=NULL;
5484 pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local.uNumOfNode=0;
5485
5486 pAsm->CALLSP++;
5487 pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex = pAsm->unSubArrayPointer;
5488 pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry = pAsm->FCSP;
5489 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local
5490 = &(pAsm->subs[pAsm->unSubArrayPointer].lstCFInstructions_local);
5491 pAsm->CALLSTACK[pAsm->CALLSP].max = 0;
5492 pAsm->CALLSTACK[pAsm->CALLSP].current = 0;
5493 SetActiveCFlist(pAsm->pR700Shader,
5494 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5495
5496 pAsm->unSubArrayPointer++;
5497
5498 /* start sub */
5499 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5500
5501 pAsm->FCSP++;
5502 pAsm->fc_stack[pAsm->FCSP].type = FC_REP;
5503
5504 checkStackDepth(pAsm, FC_REP, GL_FALSE);
5505
5506 return GL_TRUE;
5507 }
5508
5509 GLboolean assemble_ENDSUB(r700_AssemblerBase *pAsm)
5510 {
5511 if(pAsm->fc_stack[pAsm->FCSP].type != FC_REP)
5512 {
5513 radeon_error("BGNSUB/ENDSUB in shader code are not paired. \n");
5514 return GL_FALSE;
5515 }
5516
5517 /* copy max to sub structure */
5518 pAsm->subs[pAsm->CALLSTACK[pAsm->CALLSP].subDescIndex].unStackDepthMax
5519 = pAsm->CALLSTACK[pAsm->CALLSP].max;
5520
5521 decreaseCurrent(pAsm, FC_REP);
5522
5523 pAsm->CALLSP--;
5524 SetActiveCFlist(pAsm->pR700Shader,
5525 pAsm->CALLSTACK[pAsm->CALLSP].plstCFInstructions_local);
5526
5527 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5528
5529 pAsm->FCSP--;
5530
5531 return GL_TRUE;
5532 }
5533
5534 GLboolean assemble_RET(r700_AssemblerBase *pAsm)
5535 {
5536 GLuint unIF = 0;
5537
5538 if(pAsm->CALLSP > 0)
5539 { /* in sub */
5540 GLuint unFCSP;
5541 for(unFCSP=pAsm->FCSP; unFCSP>pAsm->CALLSTACK[pAsm->CALLSP].FCSP_BeforeEntry; unFCSP--)
5542 {
5543 if(FC_LOOP == pAsm->fc_stack[unFCSP].type)
5544 {
5545 setRetInLoopFlag(pAsm, SQ_SEL_1);
5546 breakLoopOnFlag(pAsm, unFCSP);
5547 pAsm->unCFflags |= LOOPRET_FLAGS;
5548
5549 return GL_TRUE;
5550 }
5551 else if(FC_IF == pAsm->fc_stack[unFCSP].type)
5552 {
5553 unIF++;
5554 }
5555 }
5556 }
5557
5558 #ifdef USE_CF_FOR_POP_AFTER
5559 if(unIF > 0)
5560 {
5561 pops(pAsm, unIF);
5562 }
5563 #endif /* USE_CF_FOR_POP_AFTER */
5564
5565 add_return_inst(pAsm);
5566
5567 return GL_TRUE;
5568 }
5569
5570 GLboolean assemble_CAL(r700_AssemblerBase *pAsm,
5571 GLint nILindex,
5572 GLuint uiNumberInsts,
5573 struct prog_instruction *pILInst)
5574 {
5575 pAsm->alu_x_opcode = SQ_CF_INST_ALU;
5576
5577 if(GL_FALSE == add_cf_instruction(pAsm) )
5578 {
5579 return GL_FALSE;
5580 }
5581
5582 pAsm->cf_current_cf_clause_ptr->m_Word1.f.call_count = 1;
5583 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 0;
5584 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5585 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5586
5587 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5588 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5589 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_CALL;
5590 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5591
5592 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5593
5594 /* Put in caller */
5595 if( (pAsm->unCallerArrayPointer + 1) > pAsm->unCallerArraySize )
5596 {
5597 pAsm->callers = (CALLER_POINTER*)_mesa_realloc( (void *)pAsm->callers,
5598 sizeof(CALLER_POINTER) * pAsm->unCallerArraySize,
5599 sizeof(CALLER_POINTER) * (pAsm->unCallerArraySize + 10) );
5600 if(NULL == pAsm->callers)
5601 {
5602 return GL_FALSE;
5603 }
5604 pAsm->unCallerArraySize += 10;
5605 }
5606
5607 pAsm->callers[pAsm->unCallerArrayPointer].subIL_Offset = nILindex;
5608 pAsm->callers[pAsm->unCallerArrayPointer].cf_ptr = pAsm->cf_current_cf_clause_ptr;
5609
5610 pAsm->unCallerArrayPointer++;
5611
5612 int j;
5613 GLuint max;
5614 GLuint unSubID;
5615 GLboolean bRet;
5616 for(j=0; j<pAsm->unSubArrayPointer; j++)
5617 {
5618 if(nILindex == pAsm->subs[j].subIL_Offset)
5619 { /* compiled before */
5620
5621 max = pAsm->subs[j].unStackDepthMax
5622 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5623 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5624 {
5625 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5626 }
5627
5628 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = j;
5629 return GL_TRUE;
5630 }
5631 }
5632
5633 pAsm->callers[pAsm->unCallerArrayPointer - 1].subDescIndex = pAsm->unSubArrayPointer;
5634 unSubID = pAsm->unSubArrayPointer;
5635
5636 bRet = AssembleInstr(nILindex, uiNumberInsts, pILInst, pAsm);
5637
5638 if(GL_TRUE == bRet)
5639 {
5640 max = pAsm->subs[unSubID].unStackDepthMax
5641 + pAsm->CALLSTACK[pAsm->CALLSP].current;
5642 if(max > pAsm->CALLSTACK[pAsm->CALLSP].max)
5643 {
5644 pAsm->CALLSTACK[pAsm->CALLSP].max = max;
5645 }
5646 }
5647
5648 return bRet;
5649 }
5650
5651 GLboolean setRetInLoopFlag(r700_AssemblerBase *pAsm, GLuint flagValue)
5652 {
5653 GLfloat fLiteral[2] = {0.1, 0.0};
5654
5655 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
5656 pAsm->D.dst.op3 = 0;
5657 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5658 pAsm->D.dst.reg = pAsm->flag_reg_index;
5659 pAsm->D.dst.writex = 1;
5660 pAsm->D.dst.writey = 0;
5661 pAsm->D.dst.writez = 0;
5662 pAsm->D.dst.writew = 0;
5663 pAsm->D2.dst2.literal = 1;
5664 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5665 pAsm->D.dst.predicated = 0;
5666 #if 0
5667 pAsm->S[0].src.rtype = SRC_REC_LITERAL;
5668 //pAsm->S[0].src.reg = 0;
5669 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5670 noneg_PVSSRC(&(pAsm->S[0].src));
5671 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5672 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5673 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5674 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5675
5676 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5677 {
5678 return GL_FALSE;
5679 }
5680 #else
5681 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5682 pAsm->S[0].src.reg = 0;
5683 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5684 noneg_PVSSRC(&(pAsm->S[0].src));
5685 pAsm->S[0].src.swizzlex = flagValue;
5686 pAsm->S[0].src.swizzley = flagValue;
5687 pAsm->S[0].src.swizzlez = flagValue;
5688 pAsm->S[0].src.swizzlew = flagValue;
5689
5690 if( GL_FALSE == next_ins2(pAsm) )
5691 {
5692 return GL_FALSE;
5693 }
5694 #endif
5695
5696 return GL_TRUE;
5697 }
5698
5699 GLboolean testFlag(r700_AssemblerBase *pAsm)
5700 {
5701 GLfloat fLiteral[2] = {0.1, 0.0};
5702
5703 //Test flag
5704 GLuint tmp = gethelpr(pAsm);
5705 pAsm->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5706
5707 pAsm->D.dst.opcode = SQ_OP2_INST_PRED_SETE;
5708 pAsm->D.dst.math = 1;
5709 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
5710 pAsm->D.dst.reg = tmp;
5711 pAsm->D.dst.writex = 1;
5712 pAsm->D.dst.writey = 0;
5713 pAsm->D.dst.writez = 0;
5714 pAsm->D.dst.writew = 0;
5715 pAsm->D2.dst2.literal = 1;
5716 pAsm->D2.dst2.SaturateMode = SATURATE_OFF;
5717 pAsm->D.dst.predicated = 1;
5718
5719 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
5720 pAsm->S[0].src.reg = pAsm->flag_reg_index;
5721 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
5722 noneg_PVSSRC(&(pAsm->S[0].src));
5723 pAsm->S[0].src.swizzlex = SQ_SEL_X;
5724 pAsm->S[0].src.swizzley = SQ_SEL_Y;
5725 pAsm->S[0].src.swizzlez = SQ_SEL_Z;
5726 pAsm->S[0].src.swizzlew = SQ_SEL_W;
5727 #if 0
5728 pAsm->S[1].src.rtype = SRC_REC_LITERAL;
5729 //pAsm->S[1].src.reg = 0;
5730 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5731 noneg_PVSSRC(&(pAsm->S[1].src));
5732 pAsm->S[1].src.swizzlex = SQ_SEL_X;
5733 pAsm->S[1].src.swizzley = SQ_SEL_Y;
5734 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
5735 pAsm->S[1].src.swizzlew = SQ_SEL_W;
5736
5737 if( GL_FALSE == next_ins_literal(pAsm, &(fLiteral[0])) )
5738 {
5739 return GL_FALSE;
5740 }
5741 #else
5742 pAsm->S[1].src.rtype = DST_REG_TEMPORARY;
5743 pAsm->S[1].src.reg = 0;
5744 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
5745 noneg_PVSSRC(&(pAsm->S[1].src));
5746 pAsm->S[1].src.swizzlex = SQ_SEL_1;
5747 pAsm->S[1].src.swizzley = SQ_SEL_1;
5748 pAsm->S[1].src.swizzlez = SQ_SEL_1;
5749 pAsm->S[1].src.swizzlew = SQ_SEL_1;
5750
5751 if( GL_FALSE == next_ins2(pAsm) )
5752 {
5753 return GL_FALSE;
5754 }
5755 #endif
5756
5757 checkStackDepth(pAsm, FC_PUSH_VPM, GL_TRUE);
5758
5759 return GL_TRUE;
5760 }
5761
5762 GLboolean returnOnFlag(r700_AssemblerBase *pAsm, GLuint unIF)
5763 {
5764 testFlag(pAsm);
5765 jumpToOffest(pAsm, 1, 4);
5766 setRetInLoopFlag(pAsm, SQ_SEL_0);
5767 pops(pAsm, unIF + 1);
5768 add_return_inst(pAsm);
5769
5770 return GL_TRUE;
5771 }
5772
5773 GLboolean breakLoopOnFlag(r700_AssemblerBase *pAsm, GLuint unFCSP)
5774 {
5775 testFlag(pAsm);
5776
5777 //break
5778 if(GL_FALSE == add_cf_instruction(pAsm) )
5779 {
5780 return GL_FALSE;
5781 }
5782
5783 pAsm->cf_current_cf_clause_ptr->m_Word1.f.pop_count = 1;
5784 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_const = 0x0;
5785 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
5786
5787 pAsm->cf_current_cf_clause_ptr->m_Word1.f.end_of_program = 0x0;
5788 pAsm->cf_current_cf_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
5789 pAsm->cf_current_cf_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_LOOP_BREAK;
5790 pAsm->cf_current_cf_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
5791
5792 pAsm->cf_current_cf_clause_ptr->m_Word1.f.barrier = 0x1;
5793
5794 pAsm->fc_stack[unFCSP].mid = (R700ControlFlowGenericClause **)_mesa_realloc(
5795 (void *)pAsm->fc_stack[unFCSP].mid,
5796 sizeof(R700ControlFlowGenericClause *) * pAsm->fc_stack[unFCSP].unNumMid,
5797 sizeof(R700ControlFlowGenericClause *) * (pAsm->fc_stack[unFCSP].unNumMid + 1) );
5798 pAsm->fc_stack[unFCSP].mid[pAsm->fc_stack[unFCSP].unNumMid] = pAsm->cf_current_cf_clause_ptr;
5799 pAsm->fc_stack[unFCSP].unNumMid++;
5800
5801 pops(pAsm, 1);
5802
5803 return GL_TRUE;
5804 }
5805
5806 GLboolean AssembleInstr(GLuint uiFirstInst,
5807 GLuint uiNumberInsts,
5808 struct prog_instruction *pILInst,
5809 r700_AssemblerBase *pR700AsmCode)
5810 {
5811 GLuint i;
5812
5813 pR700AsmCode->pILInst = pILInst;
5814 for(i=uiFirstInst; i<uiNumberInsts; i++)
5815 {
5816 pR700AsmCode->uiCurInst = i;
5817
5818 #ifndef USE_CF_FOR_CONTINUE_BREAK
5819 if(OPCODE_BRK == pILInst[i+1].Opcode)
5820 {
5821 switch(pILInst[i].Opcode)
5822 {
5823 case OPCODE_SLE:
5824 pILInst[i].Opcode = OPCODE_SGT;
5825 break;
5826 case OPCODE_SLT:
5827 pILInst[i].Opcode = OPCODE_SGE;
5828 break;
5829 case OPCODE_SGE:
5830 pILInst[i].Opcode = OPCODE_SLT;
5831 break;
5832 case OPCODE_SGT:
5833 pILInst[i].Opcode = OPCODE_SLE;
5834 break;
5835 case OPCODE_SEQ:
5836 pILInst[i].Opcode = OPCODE_SNE;
5837 break;
5838 case OPCODE_SNE:
5839 pILInst[i].Opcode = OPCODE_SEQ;
5840 break;
5841 default:
5842 break;
5843 }
5844 }
5845 #endif
5846
5847 switch (pILInst[i].Opcode)
5848 {
5849 case OPCODE_ABS:
5850 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
5851 return GL_FALSE;
5852 break;
5853 case OPCODE_ADD:
5854 case OPCODE_SUB:
5855 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
5856 return GL_FALSE;
5857 break;
5858
5859 case OPCODE_ARL:
5860 if ( GL_FALSE == assemble_ARL(pR700AsmCode) )
5861 return GL_FALSE;
5862 break;
5863 case OPCODE_ARR:
5864 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
5865 //if ( GL_FALSE == assemble_BAD("ARR") )
5866 return GL_FALSE;
5867 break;
5868
5869 case OPCODE_CMP:
5870 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
5871 return GL_FALSE;
5872 break;
5873 case OPCODE_COS:
5874 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
5875 return GL_FALSE;
5876 break;
5877
5878 case OPCODE_DP3:
5879 case OPCODE_DP4:
5880 case OPCODE_DPH:
5881 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
5882 return GL_FALSE;
5883 break;
5884
5885 case OPCODE_DST:
5886 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
5887 return GL_FALSE;
5888 break;
5889
5890 case OPCODE_EX2:
5891 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
5892 return GL_FALSE;
5893 break;
5894 case OPCODE_EXP:
5895 if ( GL_FALSE == assemble_EXP(pR700AsmCode) )
5896 return GL_FALSE;
5897 break;
5898
5899 case OPCODE_FLR:
5900 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
5901 return GL_FALSE;
5902 break;
5903 //case OP_FLR_INT: ;
5904
5905 // if ( GL_FALSE == assemble_FLR_INT() )
5906 // return GL_FALSE;
5907 // break;
5908
5909 case OPCODE_FRC:
5910 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
5911 return GL_FALSE;
5912 break;
5913
5914 case OPCODE_KIL:
5915 case OPCODE_KIL_NV:
5916 /* done at OPCODE_SE/SGT...etc. */
5917 /* if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
5918 return GL_FALSE; */
5919 break;
5920 case OPCODE_LG2:
5921 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
5922 return GL_FALSE;
5923 break;
5924 case OPCODE_LIT:
5925 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
5926 return GL_FALSE;
5927 break;
5928 case OPCODE_LRP:
5929 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
5930 return GL_FALSE;
5931 break;
5932 case OPCODE_LOG:
5933 if ( GL_FALSE == assemble_LOG(pR700AsmCode) )
5934 return GL_FALSE;
5935 break;
5936
5937 case OPCODE_MAD:
5938 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
5939 return GL_FALSE;
5940 break;
5941 case OPCODE_MAX:
5942 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
5943 return GL_FALSE;
5944 break;
5945 case OPCODE_MIN:
5946 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
5947 return GL_FALSE;
5948 break;
5949
5950 case OPCODE_MOV:
5951 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
5952 return GL_FALSE;
5953 break;
5954 case OPCODE_MUL:
5955 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
5956 return GL_FALSE;
5957 break;
5958
5959 case OPCODE_POW:
5960 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
5961 return GL_FALSE;
5962 break;
5963 case OPCODE_RCP:
5964 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
5965 return GL_FALSE;
5966 break;
5967 case OPCODE_RSQ:
5968 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
5969 return GL_FALSE;
5970 break;
5971 case OPCODE_SIN:
5972 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
5973 return GL_FALSE;
5974 break;
5975 case OPCODE_SCS:
5976 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
5977 return GL_FALSE;
5978 break;
5979
5980 case OPCODE_SEQ:
5981 if(OPCODE_IF == pILInst[i+1].Opcode)
5982 {
5983 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5984 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5985 {
5986 return GL_FALSE;
5987 }
5988 }
5989 else if(OPCODE_BRK == pILInst[i+1].Opcode)
5990 {
5991 #ifdef USE_CF_FOR_CONTINUE_BREAK
5992 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
5993 #else
5994 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
5995 #endif
5996 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
5997 {
5998 return GL_FALSE;
5999 }
6000 }
6001 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6002 {
6003 #ifdef USE_CF_FOR_CONTINUE_BREAK
6004 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6005 #else
6006 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6007 #endif
6008 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETE) )
6009 {
6010 return GL_FALSE;
6011 }
6012 }
6013 else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
6014 {
6015 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLE) )
6016 {
6017 return GL_FALSE;
6018 }
6019 }
6020 else
6021 {
6022 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETE) )
6023 {
6024 return GL_FALSE;
6025 }
6026 }
6027 break;
6028
6029 case OPCODE_SGT:
6030 if(OPCODE_IF == pILInst[i+1].Opcode)
6031 {
6032 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6033 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6034 {
6035 return GL_FALSE;
6036 }
6037 }
6038 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6039 {
6040 #ifdef USE_CF_FOR_CONTINUE_BREAK
6041 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6042 #else
6043 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6044 #endif
6045 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6046 {
6047 return GL_FALSE;
6048 }
6049 }
6050 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6051 {
6052 #ifdef USE_CF_FOR_CONTINUE_BREAK
6053 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6054 #else
6055 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6056 #endif
6057
6058 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6059 {
6060 return GL_FALSE;
6061 }
6062 }
6063 else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
6064 {
6065 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
6066 {
6067 return GL_FALSE;
6068 }
6069 }
6070 else
6071 {
6072 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
6073 {
6074 return GL_FALSE;
6075 }
6076 }
6077 break;
6078
6079 case OPCODE_SGE:
6080 if(OPCODE_IF == pILInst[i+1].Opcode)
6081 {
6082 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6083 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6084 {
6085 return GL_FALSE;
6086 }
6087 }
6088 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6089 {
6090 #ifdef USE_CF_FOR_CONTINUE_BREAK
6091 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6092 #else
6093 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6094 #endif
6095 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6096 {
6097 return GL_FALSE;
6098 }
6099 }
6100 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6101 {
6102 #ifdef USE_CF_FOR_CONTINUE_BREAK
6103 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6104 #else
6105 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6106 #endif
6107
6108 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6109 {
6110 return GL_FALSE;
6111 }
6112 }
6113 else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
6114 {
6115 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) )
6116 {
6117 return GL_FALSE;
6118 }
6119 }
6120 else
6121 {
6122 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
6123 {
6124 return GL_FALSE;
6125 }
6126 }
6127 break;
6128
6129 /* NO LT, LE, TODO : use GE => LE, GT => LT : reverse 2 src order would be simpliest. Or use SQ_CF_COND_FALSE for SQ_CF_COND_ACTIVE.*/
6130 case OPCODE_SLT:
6131 {
6132 struct prog_src_register SrcRegSave[2];
6133 SrcRegSave[0] = pILInst[i].SrcReg[0];
6134 SrcRegSave[1] = pILInst[i].SrcReg[1];
6135 pILInst[i].SrcReg[0] = SrcRegSave[1];
6136 pILInst[i].SrcReg[1] = SrcRegSave[0];
6137 if(OPCODE_IF == pILInst[i+1].Opcode)
6138 {
6139 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6140 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6141 {
6142 pILInst[i].SrcReg[0] = SrcRegSave[0];
6143 pILInst[i].SrcReg[1] = SrcRegSave[1];
6144 return GL_FALSE;
6145 }
6146 }
6147 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6148 {
6149 #ifdef USE_CF_FOR_CONTINUE_BREAK
6150 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6151 #else
6152 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6153 #endif
6154 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6155 {
6156 pILInst[i].SrcReg[0] = SrcRegSave[0];
6157 pILInst[i].SrcReg[1] = SrcRegSave[1];
6158 return GL_FALSE;
6159 }
6160 }
6161 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6162 {
6163 #ifdef USE_CF_FOR_CONTINUE_BREAK
6164 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6165 #else
6166 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6167 #endif
6168
6169 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGT) )
6170 {
6171 pILInst[i].SrcReg[0] = SrcRegSave[0];
6172 pILInst[i].SrcReg[1] = SrcRegSave[1];
6173 return GL_FALSE;
6174 }
6175 }
6176 else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
6177 {
6178 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGT) )
6179 {
6180 return GL_FALSE;
6181 }
6182 }
6183 else
6184 {
6185 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGT) )
6186 {
6187 pILInst[i].SrcReg[0] = SrcRegSave[0];
6188 pILInst[i].SrcReg[1] = SrcRegSave[1];
6189 return GL_FALSE;
6190 }
6191 }
6192 pILInst[i].SrcReg[0] = SrcRegSave[0];
6193 pILInst[i].SrcReg[1] = SrcRegSave[1];
6194 }
6195 break;
6196
6197 case OPCODE_SLE:
6198 {
6199 struct prog_src_register SrcRegSave[2];
6200 SrcRegSave[0] = pILInst[i].SrcReg[0];
6201 SrcRegSave[1] = pILInst[i].SrcReg[1];
6202 pILInst[i].SrcReg[0] = SrcRegSave[1];
6203 pILInst[i].SrcReg[1] = SrcRegSave[0];
6204 if(OPCODE_IF == pILInst[i+1].Opcode)
6205 {
6206 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6207 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6208 {
6209 pILInst[i].SrcReg[0] = SrcRegSave[0];
6210 pILInst[i].SrcReg[1] = SrcRegSave[1];
6211 return GL_FALSE;
6212 }
6213 }
6214 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6215 {
6216 #ifdef USE_CF_FOR_CONTINUE_BREAK
6217 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6218 #else
6219 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6220 #endif
6221 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6222 {
6223 pILInst[i].SrcReg[0] = SrcRegSave[0];
6224 pILInst[i].SrcReg[1] = SrcRegSave[1];
6225 return GL_FALSE;
6226 }
6227 }
6228 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6229 {
6230 #ifdef USE_CF_FOR_CONTINUE_BREAK
6231 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6232 #else
6233 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6234 #endif
6235
6236 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETGE) )
6237 {
6238 pILInst[i].SrcReg[0] = SrcRegSave[0];
6239 pILInst[i].SrcReg[1] = SrcRegSave[1];
6240 return GL_FALSE;
6241 }
6242 }
6243 else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
6244 {
6245 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLGE) )
6246 {
6247 return GL_FALSE;
6248 }
6249 }
6250 else
6251 {
6252 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETGE) )
6253 {
6254 pILInst[i].SrcReg[0] = SrcRegSave[0];
6255 pILInst[i].SrcReg[1] = SrcRegSave[1];
6256 return GL_FALSE;
6257 }
6258 }
6259 pILInst[i].SrcReg[0] = SrcRegSave[0];
6260 pILInst[i].SrcReg[1] = SrcRegSave[1];
6261 }
6262 break;
6263
6264 case OPCODE_SNE:
6265 if(OPCODE_IF == pILInst[i+1].Opcode)
6266 {
6267 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6268 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6269 {
6270 return GL_FALSE;
6271 }
6272 }
6273 else if(OPCODE_BRK == pILInst[i+1].Opcode)
6274 {
6275 #ifdef USE_CF_FOR_CONTINUE_BREAK
6276 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6277 #else
6278 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_BREAK;
6279 #endif
6280 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6281 {
6282 return GL_FALSE;
6283 }
6284 }
6285 else if(OPCODE_CONT == pILInst[i+1].Opcode)
6286 {
6287 #ifdef USE_CF_FOR_CONTINUE_BREAK
6288 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_PUSH_BEFORE;
6289 #else
6290 pR700AsmCode->alu_x_opcode = SQ_CF_INST_ALU_CONTINUE;
6291 #endif
6292 if ( GL_FALSE == assemble_LOGIC_PRED(pR700AsmCode, SQ_OP2_INST_PRED_SETNE) )
6293 {
6294 return GL_FALSE;
6295 }
6296 }
6297 else if((OPCODE_KIL == pILInst[i+1].Opcode)||(OPCODE_KIL_NV == pILInst[i+1].Opcode))
6298 {
6299 if ( GL_FALSE == assemble_KIL(pR700AsmCode, SQ_OP2_INST_KILLNE) )
6300 {
6301 return GL_FALSE;
6302 }
6303 }
6304 else
6305 {
6306 if ( GL_FALSE == assemble_LOGIC(pR700AsmCode, SQ_OP2_INST_SETNE) )
6307 {
6308 return GL_FALSE;
6309 }
6310 }
6311 break;
6312
6313 //case OP_STP:
6314 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
6315 // return GL_FALSE;
6316 // break;
6317
6318 case OPCODE_SWZ:
6319 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
6320 {
6321 return GL_FALSE;
6322 }
6323 else
6324 {
6325 if( (i+1)<uiNumberInsts )
6326 {
6327 if(OPCODE_END != pILInst[i+1].Opcode)
6328 {
6329 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
6330 {
6331 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
6332 }
6333 }
6334 }
6335 }
6336 break;
6337
6338 case OPCODE_TEX:
6339 case OPCODE_TXB:
6340 case OPCODE_TXP:
6341 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
6342 return GL_FALSE;
6343 break;
6344
6345 case OPCODE_TRUNC:
6346 if ( GL_FALSE == assemble_math_function(pR700AsmCode, SQ_OP2_INST_TRUNC) )
6347 return GL_FALSE;
6348 break;
6349
6350 case OPCODE_XPD:
6351 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
6352 return GL_FALSE;
6353 break;
6354
6355 case OPCODE_IF :
6356 {
6357 GLboolean bHasElse = GL_FALSE;
6358
6359 if(pILInst[pILInst[i].BranchTarget - 1].Opcode == OPCODE_ELSE)
6360 {
6361 bHasElse = GL_TRUE;
6362 }
6363
6364 if ( GL_FALSE == assemble_IF(pR700AsmCode, bHasElse) )
6365 {
6366 return GL_FALSE;
6367 }
6368 }
6369 break;
6370
6371 case OPCODE_ELSE :
6372 if ( GL_FALSE == assemble_ELSE(pR700AsmCode) )
6373 return GL_FALSE;
6374 break;
6375
6376 case OPCODE_ENDIF:
6377 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
6378 return GL_FALSE;
6379 break;
6380
6381 case OPCODE_BGNLOOP:
6382 if( GL_FALSE == assemble_BGNLOOP(pR700AsmCode) )
6383 {
6384 return GL_FALSE;
6385 }
6386 break;
6387
6388 case OPCODE_BRK:
6389 if( GL_FALSE == assemble_BRK(pR700AsmCode) )
6390 {
6391 return GL_FALSE;
6392 }
6393 break;
6394
6395 case OPCODE_CONT:
6396 if( GL_FALSE == assemble_CONT(pR700AsmCode) )
6397 {
6398 return GL_FALSE;
6399 }
6400 break;
6401
6402 case OPCODE_ENDLOOP:
6403 if( GL_FALSE == assemble_ENDLOOP(pR700AsmCode) )
6404 {
6405 return GL_FALSE;
6406 }
6407 break;
6408
6409 case OPCODE_BGNSUB:
6410 if( GL_FALSE == assemble_BGNSUB(pR700AsmCode, i) )
6411 {
6412 return GL_FALSE;
6413 }
6414 break;
6415
6416 case OPCODE_RET:
6417 if( GL_FALSE == assemble_RET(pR700AsmCode) )
6418 {
6419 return GL_FALSE;
6420 }
6421 break;
6422
6423 case OPCODE_CAL:
6424 if( GL_FALSE == assemble_CAL(pR700AsmCode,
6425 pILInst[i].BranchTarget,
6426 uiNumberInsts,
6427 pILInst) )
6428 {
6429 return GL_FALSE;
6430 }
6431 break;
6432
6433 //case OPCODE_EXPORT:
6434 // if ( GL_FALSE == assemble_EXPORT() )
6435 // return GL_FALSE;
6436 // break;
6437
6438 case OPCODE_ENDSUB:
6439 return assemble_ENDSUB(pR700AsmCode);
6440
6441 case OPCODE_END:
6442 //pR700AsmCode->uiCurInst = i;
6443 //This is to remaind that if in later exoort there is depth/stencil
6444 //export, we need a mov to re-arrange DST channel, where using a
6445 //psuedo inst, we will use this end inst to do it.
6446 return GL_TRUE;
6447
6448 default:
6449 radeon_error("internal: unknown instruction\n");
6450 return GL_FALSE;
6451 }
6452 }
6453
6454 return GL_TRUE;
6455 }
6456
6457 GLboolean InitShaderProgram(r700_AssemblerBase * pAsm)
6458 {
6459 setRetInLoopFlag(pAsm, SQ_SEL_0);
6460 return GL_TRUE;
6461 }
6462
6463 GLboolean RelocProgram(r700_AssemblerBase * pAsm)
6464 {
6465 GLuint i;
6466 GLuint unCFoffset;
6467 TypedShaderList * plstCFmain;
6468 TypedShaderList * plstCFsub;
6469
6470 R700ShaderInstruction * pInst;
6471 R700ControlFlowGenericClause * pCFInst;
6472
6473 plstCFmain = pAsm->CALLSTACK[0].plstCFInstructions_local;
6474
6475 /* remove flags init if they are not used */
6476 if((pAsm->unCFflags & HAS_LOOPRET) == 0)
6477 {
6478 R700ControlFlowALUClause * pCF_ALU;
6479 pInst = plstCFmain->pHead;
6480 while(pInst)
6481 {
6482 if(SIT_CF_ALU == pInst->m_ShaderInstType)
6483 {
6484 pCF_ALU = (R700ControlFlowALUClause *)pInst;
6485 if(1 == pCF_ALU->m_Word1.f.count)
6486 {
6487 pCF_ALU->m_Word1.f.cf_inst = SQ_CF_INST_NOP;
6488 }
6489 else
6490 {
6491 R700ALUInstruction * pALU = pCF_ALU->m_pLinkedALUInstruction;
6492
6493 pALU->m_pLinkedALUClause = NULL;
6494 pALU = (R700ALUInstruction *)(pALU->pNextInst);
6495 pALU->m_pLinkedALUClause = pCF_ALU;
6496 pCF_ALU->m_pLinkedALUInstruction = pALU;
6497
6498 pCF_ALU->m_Word1.f.count--;
6499 }
6500 break;
6501 }
6502 pInst = pInst->pNextInst;
6503 };
6504 }
6505
6506 if(0 == pAsm->unSubArrayPointer)
6507 {
6508 return GL_TRUE;
6509 }
6510
6511 if(pAsm->CALLSTACK[0].max > 0)
6512 {
6513 pAsm->pR700Shader->uStackSize = ((pAsm->CALLSTACK[0].max + 3)>>2) + 2;
6514 }
6515
6516 unCFoffset = plstCFmain->uNumOfNode;
6517
6518 /* Reloc subs */
6519 for(i=0; i<pAsm->unSubArrayPointer; i++)
6520 {
6521 pAsm->subs[i].unCFoffset = unCFoffset;
6522 plstCFsub = &(pAsm->subs[i].lstCFInstructions_local);
6523
6524 pInst = plstCFsub->pHead;
6525
6526 /* reloc instructions */
6527 while(pInst)
6528 {
6529 if(SIT_CF_GENERIC == pInst->m_ShaderInstType)
6530 {
6531 pCFInst = (R700ControlFlowGenericClause *)pInst;
6532
6533 switch (pCFInst->m_Word1.f.cf_inst)
6534 {
6535 case SQ_CF_INST_POP:
6536 case SQ_CF_INST_JUMP:
6537 case SQ_CF_INST_ELSE:
6538 case SQ_CF_INST_LOOP_END:
6539 case SQ_CF_INST_LOOP_START:
6540 case SQ_CF_INST_LOOP_START_NO_AL:
6541 case SQ_CF_INST_LOOP_CONTINUE:
6542 case SQ_CF_INST_LOOP_BREAK:
6543 pCFInst->m_Word0.f.addr += unCFoffset;
6544 break;
6545 default:
6546 break;
6547 }
6548 }
6549
6550 pInst->m_uIndex += unCFoffset;
6551
6552 pInst = pInst->pNextInst;
6553 };
6554
6555 /* Put sub into main */
6556 plstCFmain->pTail->pNextInst = plstCFsub->pHead;
6557 plstCFmain->pTail = plstCFsub->pTail;
6558 plstCFmain->uNumOfNode += plstCFsub->uNumOfNode;
6559
6560 unCFoffset += plstCFsub->uNumOfNode;
6561 }
6562
6563 /* reloc callers */
6564 for(i=0; i<pAsm->unCallerArrayPointer; i++)
6565 {
6566 pAsm->callers[i].cf_ptr->m_Word0.f.addr
6567 = pAsm->subs[pAsm->callers[i].subDescIndex].unCFoffset;
6568 }
6569
6570 return GL_TRUE;
6571 }
6572
6573 GLboolean Process_Export(r700_AssemblerBase* pAsm,
6574 GLuint type,
6575 GLuint export_starting_index,
6576 GLuint export_count,
6577 GLuint starting_register_number,
6578 GLboolean is_depth_export)
6579 {
6580 unsigned char ucWriteMask;
6581
6582 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
6583 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
6584
6585 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
6586
6587 switch (type)
6588 {
6589 case SQ_EXPORT_PIXEL:
6590 if(GL_TRUE == is_depth_export)
6591 {
6592 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
6593 }
6594 else
6595 {
6596 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
6597 }
6598 break;
6599
6600 case SQ_EXPORT_POS:
6601 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
6602 break;
6603
6604 case SQ_EXPORT_PARAM:
6605 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
6606 break;
6607
6608 default:
6609 radeon_error("Unknown export type: %d\n", type);
6610 return GL_FALSE;
6611 break;
6612 }
6613
6614 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
6615
6616 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
6617 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
6618 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
6619
6620 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
6621 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
6622 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
6623 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
6624 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
6625 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
6626
6627 if (export_count == 1)
6628 {
6629 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
6630 /* exports Z as a float into Red channel */
6631 if (GL_TRUE == is_depth_export)
6632 ucWriteMask = 0x1;
6633
6634 if( (ucWriteMask & 0x1) != 0)
6635 {
6636 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6637 }
6638 else
6639 {
6640 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
6641 }
6642 if( ((ucWriteMask>>1) & 0x1) != 0)
6643 {
6644 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6645 }
6646 else
6647 {
6648 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
6649 }
6650 if( ((ucWriteMask>>2) & 0x1) != 0)
6651 {
6652 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6653 }
6654 else
6655 {
6656 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
6657 }
6658 if( ((ucWriteMask>>3) & 0x1) != 0)
6659 {
6660 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6661 }
6662 else
6663 {
6664 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
6665 }
6666 }
6667 else
6668 {
6669 // This should only be used if all components for all registers have been written
6670 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
6671 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
6672 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
6673 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
6674 }
6675
6676 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
6677
6678 return GL_TRUE;
6679 }
6680
6681 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
6682 {
6683 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
6684 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
6685
6686 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
6687
6688 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
6689
6690 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
6691 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
6692 pAsm->D.dst.reg = pAsm->depth_export_register_number;
6693
6694 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
6695
6696 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
6697 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
6698 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
6699
6700 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
6701
6702 noneg_PVSSRC(&(pAsm->S[0].src));
6703
6704 if( GL_FALSE == next_ins(pAsm) )
6705 {
6706 return GL_FALSE;
6707 }
6708
6709 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
6710
6711 return GL_TRUE;
6712 }
6713
6714 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
6715 GLbitfield OutputsWritten)
6716 {
6717 unsigned int unBit;
6718 GLuint export_count = 0;
6719
6720 if(pR700AsmCode->depth_export_register_number >= 0)
6721 {
6722 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
6723 {
6724 return GL_FALSE;
6725 }
6726 }
6727
6728 unBit = 1 << FRAG_RESULT_COLOR;
6729 if(OutputsWritten & unBit)
6730 {
6731 if( GL_FALSE == Process_Export(pR700AsmCode,
6732 SQ_EXPORT_PIXEL,
6733 0,
6734 1,
6735 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
6736 GL_FALSE) )
6737 {
6738 return GL_FALSE;
6739 }
6740 export_count++;
6741 }
6742 unBit = 1 << FRAG_RESULT_DEPTH;
6743 if(OutputsWritten & unBit)
6744 {
6745 if( GL_FALSE == Process_Export(pR700AsmCode,
6746 SQ_EXPORT_PIXEL,
6747 0,
6748 1,
6749 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
6750 GL_TRUE))
6751 {
6752 return GL_FALSE;
6753 }
6754 export_count++;
6755 }
6756 /* Need to export something, otherwise we'll hang
6757 * results are undefined anyway */
6758 if(export_count == 0)
6759 {
6760 Process_Export(pR700AsmCode, SQ_EXPORT_PIXEL, 0, 1, 0, GL_FALSE);
6761 }
6762
6763 if(pR700AsmCode->cf_last_export_ptr != NULL)
6764 {
6765 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6766 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6767 }
6768
6769 return GL_TRUE;
6770 }
6771
6772 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
6773 GLbitfield OutputsWritten)
6774 {
6775 unsigned int unBit;
6776 unsigned int i;
6777
6778 GLuint export_starting_index = 0;
6779 GLuint export_count = pR700AsmCode->number_of_exports;
6780
6781 unBit = 1 << VERT_RESULT_HPOS;
6782 if(OutputsWritten & unBit)
6783 {
6784 if( GL_FALSE == Process_Export(pR700AsmCode,
6785 SQ_EXPORT_POS,
6786 export_starting_index,
6787 1,
6788 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
6789 GL_FALSE) )
6790 {
6791 return GL_FALSE;
6792 }
6793
6794 export_count--;
6795
6796 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6797 }
6798
6799 pR700AsmCode->number_of_exports = export_count;
6800
6801 unBit = 1 << VERT_RESULT_COL0;
6802 if(OutputsWritten & unBit)
6803 {
6804 if( GL_FALSE == Process_Export(pR700AsmCode,
6805 SQ_EXPORT_PARAM,
6806 export_starting_index,
6807 1,
6808 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
6809 GL_FALSE) )
6810 {
6811 return GL_FALSE;
6812 }
6813
6814 export_starting_index++;
6815 }
6816
6817 unBit = 1 << VERT_RESULT_COL1;
6818 if(OutputsWritten & unBit)
6819 {
6820 if( GL_FALSE == Process_Export(pR700AsmCode,
6821 SQ_EXPORT_PARAM,
6822 export_starting_index,
6823 1,
6824 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
6825 GL_FALSE) )
6826 {
6827 return GL_FALSE;
6828 }
6829
6830 export_starting_index++;
6831 }
6832
6833 unBit = 1 << VERT_RESULT_FOGC;
6834 if(OutputsWritten & unBit)
6835 {
6836 if( GL_FALSE == Process_Export(pR700AsmCode,
6837 SQ_EXPORT_PARAM,
6838 export_starting_index,
6839 1,
6840 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
6841 GL_FALSE) )
6842 {
6843 return GL_FALSE;
6844 }
6845
6846 export_starting_index++;
6847 }
6848
6849 for(i=0; i<8; i++)
6850 {
6851 unBit = 1 << (VERT_RESULT_TEX0 + i);
6852 if(OutputsWritten & unBit)
6853 {
6854 if( GL_FALSE == Process_Export(pR700AsmCode,
6855 SQ_EXPORT_PARAM,
6856 export_starting_index,
6857 1,
6858 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
6859 GL_FALSE) )
6860 {
6861 return GL_FALSE;
6862 }
6863
6864 export_starting_index++;
6865 }
6866 }
6867
6868 for(i=VERT_RESULT_VAR0; i<VERT_RESULT_MAX; i++)
6869 {
6870 unBit = 1 << i;
6871 if(OutputsWritten & unBit)
6872 {
6873 if( GL_FALSE == Process_Export(pR700AsmCode,
6874 SQ_EXPORT_PARAM,
6875 export_starting_index,
6876 1,
6877 pR700AsmCode->ucVP_OutputMap[i],
6878 GL_FALSE) )
6879 {
6880 return GL_FALSE;
6881 }
6882
6883 export_starting_index++;
6884 }
6885 }
6886
6887 // At least one param should be exported
6888 if (export_count)
6889 {
6890 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6891 }
6892 else
6893 {
6894 if( GL_FALSE == Process_Export(pR700AsmCode,
6895 SQ_EXPORT_PARAM,
6896 0,
6897 1,
6898 pR700AsmCode->starting_export_register_number,
6899 GL_FALSE) )
6900 {
6901 return GL_FALSE;
6902 }
6903
6904 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
6905 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
6906 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
6907 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
6908 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
6909 }
6910
6911 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
6912
6913 return GL_TRUE;
6914 }
6915
6916 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
6917 {
6918 FREE(pR700AsmCode->pucOutMask);
6919 FREE(pR700AsmCode->pInstDeps);
6920
6921 if(NULL != pR700AsmCode->subs)
6922 {
6923 FREE(pR700AsmCode->subs);
6924 }
6925 if(NULL != pR700AsmCode->callers)
6926 {
6927 FREE(pR700AsmCode->callers);
6928 }
6929
6930 return GL_TRUE;
6931 }
6932