ebd5ff106befb0248c5677e7ca103344adabac5c
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "r600_context.h"
37 #include "r700_debug.h"
38
39 #include "r700_assembler.h"
40
41 BITS addrmode_PVSDST(PVSDST * pPVSDST)
42 {
43 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
44 }
45
46 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
47 {
48 pPVSDST->addrmode0 = addrmode & 1;
49 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
50 }
51
52 void nomask_PVSDST(PVSDST * pPVSDST)
53 {
54 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
55 }
56
57 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
58 {
59 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
60 }
61
62 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
63 {
64 pPVSSRC->addrmode0 = addrmode & 1;
65 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
66 }
67
68
69 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
70 {
71 pPVSSRC->swizzlex =
72 pPVSSRC->swizzley =
73 pPVSSRC->swizzlez =
74 pPVSSRC->swizzlew = swz;
75 }
76
77 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
78 {
79 pPVSSRC->swizzlex = SQ_SEL_X;
80 pPVSSRC->swizzley = SQ_SEL_Y;
81 pPVSSRC->swizzlez = SQ_SEL_Z;
82 pPVSSRC->swizzlew = SQ_SEL_W;
83 }
84
85 void
86 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
87 {
88 switch (x)
89 {
90 case SQ_SEL_X: x = pPVSSRC->swizzlex;
91 break;
92 case SQ_SEL_Y: x = pPVSSRC->swizzley;
93 break;
94 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
95 break;
96 case SQ_SEL_W: x = pPVSSRC->swizzlew;
97 break;
98 default:;
99 }
100
101 switch (y)
102 {
103 case SQ_SEL_X: y = pPVSSRC->swizzlex;
104 break;
105 case SQ_SEL_Y: y = pPVSSRC->swizzley;
106 break;
107 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
108 break;
109 case SQ_SEL_W: y = pPVSSRC->swizzlew;
110 break;
111 default:;
112 }
113
114 switch (z)
115 {
116 case SQ_SEL_X: z = pPVSSRC->swizzlex;
117 break;
118 case SQ_SEL_Y: z = pPVSSRC->swizzley;
119 break;
120 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
121 break;
122 case SQ_SEL_W: z = pPVSSRC->swizzlew;
123 break;
124 default:;
125 }
126
127 switch (w)
128 {
129 case SQ_SEL_X: w = pPVSSRC->swizzlex;
130 break;
131 case SQ_SEL_Y: w = pPVSSRC->swizzley;
132 break;
133 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
134 break;
135 case SQ_SEL_W: w = pPVSSRC->swizzlew;
136 break;
137 default:;
138 }
139
140 pPVSSRC->swizzlex = x;
141 pPVSSRC->swizzley = y;
142 pPVSSRC->swizzlez = z;
143 pPVSSRC->swizzlew = w;
144 }
145
146 void neg_PVSSRC(PVSSRC* pPVSSRC)
147 {
148 pPVSSRC->negx = 1;
149 pPVSSRC->negy = 1;
150 pPVSSRC->negz = 1;
151 pPVSSRC->negw = 1;
152 }
153
154 void noneg_PVSSRC(PVSSRC* pPVSSRC)
155 {
156 pPVSSRC->negx = 0;
157 pPVSSRC->negy = 0;
158 pPVSSRC->negz = 0;
159 pPVSSRC->negw = 0;
160 }
161
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
164 {
165 pPVSSRC->negx = !pPVSSRC->negx;
166 pPVSSRC->negy = !pPVSSRC->negy;
167 pPVSSRC->negz = !pPVSSRC->negz;
168 pPVSSRC->negw = !pPVSSRC->negw;
169 }
170
171 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
172 {
173 switch (c)
174 {
175 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
176 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
177 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
178 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
179 default:;
180 }
181 }
182
183 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
184 {
185 switch (c)
186 {
187 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
188 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
189 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
190 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
191 default:;
192 }
193 }
194
195 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
196 {
197 return (pOutVTXFmt0->point_size |
198 pOutVTXFmt0->edge_flag |
199 pOutVTXFmt0->rta_index |
200 pOutVTXFmt0->kill_flag |
201 pOutVTXFmt0->viewport_index);
202 }
203
204 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
205 {
206 return (pFPOutFmt->depth |
207 pFPOutFmt->stencil_ref |
208 pFPOutFmt->mask |
209 pFPOutFmt->coverage_to_mask);
210 }
211
212 GLboolean is_reduction_opcode(PVSDWORD* dest)
213 {
214 if (dest->dst.op3 == 0)
215 {
216 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) )
217 {
218 return GL_TRUE;
219 }
220 }
221 return GL_FALSE;
222 }
223
224 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
225 {
226 GLuint format = FMT_INVALID;
227 GLuint uiElemSize = 0;
228
229 switch (eType)
230 {
231 case GL_BYTE:
232 case GL_UNSIGNED_BYTE:
233 uiElemSize = 1;
234 switch(nChannels)
235 {
236 case 1:
237 format = FMT_8; break;
238 case 2:
239 format = FMT_8_8; break;
240 case 3:
241 format = FMT_8_8_8; break;
242 case 4:
243 format = FMT_8_8_8_8; break;
244 default:
245 break;
246 }
247 break;
248
249 case GL_UNSIGNED_SHORT:
250 case GL_SHORT:
251 uiElemSize = 2;
252 switch(nChannels)
253 {
254 case 1:
255 format = FMT_16; break;
256 case 2:
257 format = FMT_16_16; break;
258 case 3:
259 format = FMT_16_16_16; break;
260 case 4:
261 format = FMT_16_16_16_16; break;
262 default:
263 break;
264 }
265 break;
266
267 case GL_UNSIGNED_INT:
268 case GL_INT:
269 uiElemSize = 4;
270 switch(nChannels)
271 {
272 case 1:
273 format = FMT_32; break;
274 case 2:
275 format = FMT_32_32; break;
276 case 3:
277 format = FMT_32_32_32; break;
278 case 4:
279 format = FMT_32_32_32_32; break;
280 default:
281 break;
282 }
283 break;
284
285 case GL_FLOAT:
286 uiElemSize = 4;
287 switch(nChannels)
288 {
289 case 1:
290 format = FMT_32_FLOAT; break;
291 case 2:
292 format = FMT_32_32_FLOAT; break;
293 case 3:
294 format = FMT_32_32_32_FLOAT; break;
295 case 4:
296 format = FMT_32_32_32_32_FLOAT; break;
297 default:
298 break;
299 }
300 break;
301 case GL_DOUBLE:
302 uiElemSize = 8;
303 switch(nChannels)
304 {
305 case 1:
306 format = FMT_32_FLOAT; break;
307 case 2:
308 format = FMT_32_32_FLOAT; break;
309 case 3:
310 format = FMT_32_32_32_FLOAT; break;
311 case 4:
312 format = FMT_32_32_32_32_FLOAT; break;
313 default:
314 break;
315 }
316 break;
317 default:
318 ;
319 //GL_ASSERT_NO_CASE();
320 }
321
322 if(NULL != pClient_size)
323 {
324 *pClient_size = uiElemSize * nChannels;
325 }
326
327 return(format);
328 }
329
330 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
331 {
332 if(pAsm->D.dst.op3)
333 {
334 return 3;
335 }
336
337 switch (pAsm->D.dst.opcode)
338 {
339 case SQ_OP2_INST_ADD:
340 case SQ_OP2_INST_MUL:
341 case SQ_OP2_INST_MAX:
342 case SQ_OP2_INST_MIN:
343 //case SQ_OP2_INST_MAX_DX10:
344 //case SQ_OP2_INST_MIN_DX10:
345 case SQ_OP2_INST_SETGT:
346 case SQ_OP2_INST_SETGE:
347 case SQ_OP2_INST_PRED_SETE:
348 case SQ_OP2_INST_PRED_SETGT:
349 case SQ_OP2_INST_PRED_SETGE:
350 case SQ_OP2_INST_PRED_SETNE:
351 case SQ_OP2_INST_DOT4:
352 case SQ_OP2_INST_DOT4_IEEE:
353 return 2;
354
355 case SQ_OP2_INST_MOV:
356 case SQ_OP2_INST_FRACT:
357 case SQ_OP2_INST_FLOOR:
358 case SQ_OP2_INST_KILLGT:
359 case SQ_OP2_INST_EXP_IEEE:
360 case SQ_OP2_INST_LOG_CLAMPED:
361 case SQ_OP2_INST_LOG_IEEE:
362 case SQ_OP2_INST_RECIP_IEEE:
363 case SQ_OP2_INST_RECIPSQRT_IEEE:
364 case SQ_OP2_INST_FLT_TO_INT:
365 case SQ_OP2_INST_SIN:
366 case SQ_OP2_INST_COS:
367 return 1;
368
369 default: r700_error(TODO_ASM_NEEDIMPINST,
370 "Need instruction operand number. \n");;
371 };
372
373 return 3;
374 }
375
376 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
377 {
378 GLuint i;
379
380 Init_R700_Shader(pShader);
381 pAsm->pR700Shader = pShader;
382 pAsm->currentShaderType = spt;
383
384 pAsm->cf_last_export_ptr = NULL;
385
386 pAsm->cf_current_export_clause_ptr = NULL;
387 pAsm->cf_current_alu_clause_ptr = NULL;
388 pAsm->cf_current_tex_clause_ptr = NULL;
389 pAsm->cf_current_vtx_clause_ptr = NULL;
390 pAsm->cf_current_cf_clause_ptr = NULL;
391
392 // No clause has been created yet
393 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
394
395 pAsm->number_of_colorandz_exports = 0;
396 pAsm->number_of_exports = 0;
397 pAsm->number_of_export_opcodes = 0;
398
399
400 pAsm->D.bits = 0;
401 pAsm->S[0].bits = 0;
402 pAsm->S[1].bits = 0;
403 pAsm->S[2].bits = 0;
404
405 pAsm->uLastPosUpdate = 0;
406
407 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
408
409 pAsm->uIIns = 0;
410 pAsm->uOIns = 0;
411 pAsm->number_used_registers = 0;
412 pAsm->uUsedConsts = 256;
413
414
415 // Fragment programs
416 pAsm->uBoolConsts = 0;
417 pAsm->uIntConsts = 0;
418 pAsm->uInsts = 0;
419 pAsm->uConsts = 0;
420
421 pAsm->FCSP = 0;
422 pAsm->fc_stack[0].type = FC_NONE;
423
424 pAsm->branch_depth = 0;
425 pAsm->max_branch_depth = 0;
426
427 pAsm->aArgSubst[0] =
428 pAsm->aArgSubst[1] =
429 pAsm->aArgSubst[2] =
430 pAsm->aArgSubst[3] = (-1);
431
432 pAsm->uOutputs = 0;
433
434 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
435 {
436 pAsm->color_export_register_number[i] = (-1);
437 }
438
439
440 pAsm->depth_export_register_number = (-1);
441 pAsm->stencil_export_register_number = (-1);
442 pAsm->coverage_to_mask_export_register_number = (-1);
443 pAsm->mask_export_register_number = (-1);
444
445 pAsm->starting_export_register_number = 0;
446 pAsm->starting_vfetch_register_number = 0;
447 pAsm->starting_temp_register_number = 0;
448 pAsm->uFirstHelpReg = 0;
449
450
451 pAsm->input_position_is_used = GL_FALSE;
452 pAsm->input_normal_is_used = GL_FALSE;
453
454
455 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
456 {
457 pAsm->input_color_is_used[ i ] = GL_FALSE;
458 }
459
460 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
461 {
462 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
463 }
464
465 for (i=0; i<VERT_ATTRIB_MAX; i++)
466 {
467 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
468 }
469
470 pAsm->number_of_inputs = 0;
471
472 return 0;
473 }
474
475 GLboolean IsTex(gl_inst_opcode Opcode)
476 {
477 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
478 {
479 return GL_TRUE;
480 }
481 return GL_FALSE;
482 }
483
484 GLboolean IsAlu(gl_inst_opcode Opcode)
485 {
486 //TODO : more for fc and ex for higher spec.
487 if( IsTex(Opcode) )
488 {
489 return GL_FALSE;
490 }
491 return GL_TRUE;
492 }
493
494 int check_current_clause(r700_AssemblerBase* pAsm,
495 CF_CLAUSE_TYPE new_clause_type)
496 {
497 if (pAsm->cf_current_clause_type != new_clause_type)
498 { //Close last open clause
499 switch (pAsm->cf_current_clause_type)
500 {
501 case CF_ALU_CLAUSE:
502 if ( pAsm->cf_current_alu_clause_ptr != NULL)
503 {
504 pAsm->cf_current_alu_clause_ptr = NULL;
505 }
506 break;
507 case CF_VTX_CLAUSE:
508 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
509 {
510 pAsm->cf_current_vtx_clause_ptr = NULL;
511 }
512 break;
513 case CF_TEX_CLAUSE:
514 if ( pAsm->cf_current_tex_clause_ptr != NULL)
515 {
516 pAsm->cf_current_tex_clause_ptr = NULL;
517 }
518 break;
519 case CF_EXPORT_CLAUSE:
520 if ( pAsm->cf_current_export_clause_ptr != NULL)
521 {
522 pAsm->cf_current_export_clause_ptr = NULL;
523 }
524 break;
525 case CF_OTHER_CLAUSE:
526 if ( pAsm->cf_current_cf_clause_ptr != NULL)
527 {
528 pAsm->cf_current_cf_clause_ptr = NULL;
529 }
530 break;
531 case CF_EMPTY_CLAUSE:
532 break;
533 default:
534 r700_error(ERROR_ASM_VTX_CLAUSE,
535 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
536 return GL_FALSE;
537 }
538
539 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
540
541 // Create new clause
542 switch (new_clause_type)
543 {
544 case CF_ALU_CLAUSE:
545 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
546 break;
547 case CF_VTX_CLAUSE:
548 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
549 break;
550 case CF_TEX_CLAUSE:
551 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
552 break;
553 case CF_EXPORT_CLAUSE:
554 {
555 R700ControlFlowSXClause* pR700ControlFlowSXClause
556 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
557
558 // Add new export instruction to control flow program
559 if (pR700ControlFlowSXClause != 0)
560 {
561 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
562 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
563 AddCFInstruction( pAsm->pR700Shader,
564 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
565 }
566 else
567 {
568 r700_error(ERROR_ASM_ALLOCEXPORTCF,
569 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
570 return GL_FALSE;
571 }
572 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
573 }
574 break;
575 case CF_EMPTY_CLAUSE:
576 break;
577 case CF_OTHER_CLAUSE:
578 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
579 break;
580 default:
581 r700_error(ERROR_ASM_UNKOWNCLAUSE,
582 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
583 return GL_FALSE;
584 }
585 }
586
587 return GL_TRUE;
588 }
589
590 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
591 R700VertexInstruction* vertex_instruction_ptr)
592 {
593 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
594 {
595 return GL_FALSE;
596 }
597
598 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
599 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
600 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
601 ) )
602 {
603 // Create new Vfetch control flow instruction for this new clause
604 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
605
606 if (pAsm->cf_current_vtx_clause_ptr != NULL)
607 {
608 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
609 AddCFInstruction( pAsm->pR700Shader,
610 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
611 }
612 else
613 {
614 r700_error(ERROR_ASM_ALLOCVTXCF, "Could not allocate a new VFetch CF instruction.");
615 return GL_FALSE;
616 }
617
618 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
619 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
620 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
621 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
622 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
623 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
624 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
625 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
626 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
627
628 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
629 }
630 else
631 {
632 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
633 }
634
635 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
636
637 return GL_TRUE;
638 }
639
640 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
641 R700TextureInstruction* tex_instruction_ptr)
642 {
643 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
644 {
645 return GL_FALSE;
646 }
647
648 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
649 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
650 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
651 ) )
652 {
653 // new tex cf instruction for this new clause
654 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
655
656 if (pAsm->cf_current_tex_clause_ptr != NULL)
657 {
658 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
659 AddCFInstruction( pAsm->pR700Shader,
660 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
661 }
662 else
663 {
664 r700_error(ERROR_ASM_ALLOCTEXCF, "Could not allocate a new TEX CF instruction.");
665 return GL_FALSE;
666 }
667
668 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
669 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
670 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
671
672 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
673 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
674 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
675 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
676 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
677 }
678 else
679 {
680 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
681 }
682
683 // If this clause constains any TEX instruction that is dependent on a previous instruction,
684 // set the barrier bit
685 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
686 {
687 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
688 }
689
690 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
691 {
692 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
693 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
694 }
695
696 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
697
698 return GL_TRUE;
699 }
700
701 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
702 GLuint gl_client_id,
703 GLuint destination_register,
704 GLuint number_of_elements,
705 GLenum dataElementType,
706 VTX_FETCH_METHOD* pFetchMethod)
707 {
708 GLuint client_size_inbyte;
709 GLuint data_format;
710 GLuint mega_fetch_count;
711 GLuint is_mega_fetch_flag;
712
713 R700VertexGenericFetch* vfetch_instruction_ptr;
714 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
715
716 if (assembled_vfetch_instruction_ptr == NULL)
717 {
718 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
719 if (vfetch_instruction_ptr == NULL)
720 {
721 return GL_FALSE;
722 }
723 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
724 }
725 else
726 {
727 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
728 }
729
730 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
731
732 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
733 {
734 //TODO : mini fetch
735 }
736 else
737 {
738 mega_fetch_count = MEGA_FETCH_BYTES - 1;
739 is_mega_fetch_flag = 0x1;
740 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
741 }
742
743 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
744 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
745 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
746
747 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
748 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
749 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
750 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
751 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
752
753 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
754 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
755 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
756 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
757
758 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
759
760 // Destination register
761 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
762 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
763
764 vfetch_instruction_ptr->m_Word2.f.offset = 0;
765 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
766
767 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
768
769 if (assembled_vfetch_instruction_ptr == NULL)
770 {
771 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
772 {
773 return GL_FALSE;
774 }
775
776 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
777 {
778 return GL_FALSE;
779 }
780 else
781 {
782 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
783 }
784 }
785
786 return GL_TRUE;
787 }
788
789 GLuint gethelpr(r700_AssemblerBase* pAsm)
790 {
791 GLuint r = pAsm->uHelpReg;
792 pAsm->uHelpReg++;
793 if (pAsm->uHelpReg > pAsm->number_used_registers)
794 {
795 pAsm->number_used_registers = pAsm->uHelpReg;
796 }
797 return r;
798 }
799 void resethelpr(r700_AssemblerBase* pAsm)
800 {
801 pAsm->uHelpReg = pAsm->uFirstHelpReg;
802 }
803
804 void checkop_init(r700_AssemblerBase* pAsm)
805 {
806 resethelpr(pAsm);
807 pAsm->aArgSubst[0] =
808 pAsm->aArgSubst[1] =
809 pAsm->aArgSubst[2] =
810 pAsm->aArgSubst[3] = -1;
811 }
812
813 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
814 {
815 GLuint tmp = gethelpr(pAsm);
816
817 //mov src to temp helper gpr.
818 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
819
820 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
821
822 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
823 pAsm->D.dst.reg = tmp;
824
825 nomask_PVSDST(&(pAsm->D.dst));
826
827 if( GL_FALSE == assemble_src(pAsm, src, 0) )
828 {
829 return GL_FALSE;
830 }
831
832 noswizzle_PVSSRC(&(pAsm->S[0].src));
833 noneg_PVSSRC(&(pAsm->S[0].src));
834
835 if( GL_FALSE == next_ins(pAsm) )
836 {
837 return GL_FALSE;
838 }
839
840 pAsm->aArgSubst[1 + src] = tmp;
841
842 return GL_TRUE;
843 }
844
845 GLboolean checkop1(r700_AssemblerBase* pAsm)
846 {
847 checkop_init(pAsm);
848 return GL_TRUE;
849 }
850
851 GLboolean checkop2(r700_AssemblerBase* pAsm)
852 {
853 GLboolean bSrcConst[2];
854 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
855
856 checkop_init(pAsm);
857
858 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
859 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
860 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
861 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
862 {
863 bSrcConst[0] = GL_TRUE;
864 }
865 else
866 {
867 bSrcConst[0] = GL_FALSE;
868 }
869 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
870 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
871 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
872 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
873 {
874 bSrcConst[1] = GL_TRUE;
875 }
876 else
877 {
878 bSrcConst[1] = GL_FALSE;
879 }
880
881 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
882 {
883 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
884 {
885 if( GL_FALSE == mov_temp(pAsm, 1) )
886 {
887 return GL_FALSE;
888 }
889 }
890 }
891
892 return GL_TRUE;
893 }
894
895 GLboolean checkop3(r700_AssemblerBase* pAsm)
896 {
897 GLboolean bSrcConst[3];
898 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
899
900 checkop_init(pAsm);
901
902 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
903 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
904 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
905 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
906 {
907 bSrcConst[0] = GL_TRUE;
908 }
909 else
910 {
911 bSrcConst[0] = GL_FALSE;
912 }
913 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
914 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
915 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
916 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
917 {
918 bSrcConst[1] = GL_TRUE;
919 }
920 else
921 {
922 bSrcConst[1] = GL_FALSE;
923 }
924 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
925 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
926 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
927 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
928 {
929 bSrcConst[2] = GL_TRUE;
930 }
931 else
932 {
933 bSrcConst[2] = GL_FALSE;
934 }
935
936 if( (GL_TRUE == bSrcConst[0]) &&
937 (GL_TRUE == bSrcConst[1]) &&
938 (GL_TRUE == bSrcConst[2]) )
939 {
940 if( GL_FALSE == mov_temp(pAsm, 1) )
941 {
942 return GL_FALSE;
943 }
944 if( GL_FALSE == mov_temp(pAsm, 2) )
945 {
946 return GL_FALSE;
947 }
948
949 return GL_TRUE;
950 }
951 else if( (GL_TRUE == bSrcConst[0]) &&
952 (GL_TRUE == bSrcConst[1]) )
953 {
954 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
955 {
956 if( GL_FALSE == mov_temp(pAsm, 1) )
957 {
958 return 1;
959 }
960 }
961
962 return GL_TRUE;
963 }
964 else if ( (GL_TRUE == bSrcConst[0]) &&
965 (GL_TRUE == bSrcConst[2]) )
966 {
967 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
968 {
969 if( GL_FALSE == mov_temp(pAsm, 2) )
970 {
971 return GL_FALSE;
972 }
973 }
974
975 return GL_TRUE;
976 }
977 else if( (GL_TRUE == bSrcConst[1]) &&
978 (GL_TRUE == bSrcConst[2]) )
979 {
980 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
981 {
982 if( GL_FALSE == mov_temp(pAsm, 2) )
983 {
984 return GL_FALSE;
985 }
986 }
987
988 return GL_TRUE;
989 }
990
991 return GL_TRUE;
992 }
993
994 GLboolean assemble_src(r700_AssemblerBase *pAsm,
995 int src,
996 int fld)
997 {
998 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
999
1000 if (fld == -1)
1001 {
1002 fld = src;
1003 }
1004
1005 if(pAsm->aArgSubst[1+src] >= 0)
1006 {
1007 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1008 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1009 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1010 }
1011 else
1012 {
1013 switch (pILInst->SrcReg[src].File)
1014 {
1015 case PROGRAM_TEMPORARY:
1016 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1017 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1018 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1019 break;
1020 case PROGRAM_CONSTANT:
1021 case PROGRAM_LOCAL_PARAM:
1022 case PROGRAM_ENV_PARAM:
1023 case PROGRAM_STATE_VAR:
1024 if (1 == pILInst->SrcReg[src].RelAddr)
1025 {
1026 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1027 }
1028 else
1029 {
1030 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1031 }
1032
1033 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1034 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1035 break;
1036 case PROGRAM_INPUT:
1037 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1038 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1039 switch (pAsm->currentShaderType)
1040 {
1041 case SPT_FP:
1042 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1043 break;
1044 case SPT_VP:
1045 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1046 break;
1047 }
1048 break;
1049 default:
1050 r700_error(ERROR_ASM_SRCARGUMENT, "Invalid source argument type");
1051 return GL_FALSE;
1052 }
1053 }
1054
1055 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1056 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1057 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1058 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1059
1060 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1061 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1062 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1063 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1064
1065 return GL_TRUE;
1066 }
1067
1068 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1069 {
1070 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1071 switch (pILInst->DstReg.File)
1072 {
1073 case PROGRAM_TEMPORARY:
1074 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1075 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1076 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1077 break;
1078 case PROGRAM_ADDRESS:
1079 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1080 pAsm->D.dst.rtype = DST_REG_A0;
1081 pAsm->D.dst.reg = 0;
1082 break;
1083 case PROGRAM_OUTPUT:
1084 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1085 pAsm->D.dst.rtype = DST_REG_OUT;
1086 switch (pAsm->currentShaderType)
1087 {
1088 case SPT_FP:
1089 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1090 break;
1091 case SPT_VP:
1092 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1093 break;
1094 }
1095 break;
1096 default:
1097 r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type");
1098 return GL_FALSE;
1099 }
1100
1101 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1102 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1103 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1104 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1105
1106 return GL_TRUE;
1107 }
1108
1109 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1110 {
1111 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1112
1113 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1114 {
1115 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1116 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1117
1118 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1119 }
1120 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1121 {
1122 pAsm->D.dst.rtype = DST_REG_OUT;
1123 switch (pAsm->currentShaderType)
1124 {
1125 case SPT_FP:
1126 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1127 break;
1128 case SPT_VP:
1129 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1130 break;
1131 }
1132
1133 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1134 }
1135 else
1136 {
1137 r700_error(ERROR_ASM_DSTARGUMENT, "Invalid destination output argument type");
1138 return GL_FALSE;
1139 }
1140
1141 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1142 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1143 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1144 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1145
1146 return GL_TRUE;
1147 }
1148
1149 GLboolean tex_src(r700_AssemblerBase *pAsm)
1150 {
1151 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1152
1153 GLboolean bValidTexCoord = GL_FALSE;
1154
1155 switch (pILInst->SrcReg[0].File)
1156 {
1157 case PROGRAM_TEMPORARY:
1158 bValidTexCoord = GL_TRUE;
1159
1160 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index + pAsm->starting_temp_register_number;
1161 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1162
1163 break;
1164 case PROGRAM_INPUT:
1165 switch (pILInst->SrcReg[0].Index)
1166 {
1167 case FRAG_ATTRIB_COL0:
1168 case FRAG_ATTRIB_COL1:
1169 case FRAG_ATTRIB_TEX0:
1170 case FRAG_ATTRIB_TEX1:
1171 case FRAG_ATTRIB_TEX2:
1172 case FRAG_ATTRIB_TEX3:
1173 case FRAG_ATTRIB_TEX4:
1174 case FRAG_ATTRIB_TEX5:
1175 case FRAG_ATTRIB_TEX6:
1176 case FRAG_ATTRIB_TEX7:
1177 bValidTexCoord = GL_TRUE;
1178
1179 pAsm->S[0].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1180 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1181 }
1182 break;
1183 }
1184
1185 if(GL_TRUE == bValidTexCoord)
1186 {
1187 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1188 }
1189 else
1190 {
1191 r700_error(ERROR_ASM_BADTEXSRC, "Invalid source texcoord for TEX instruction");
1192 return GL_FALSE;
1193 }
1194
1195 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1196 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1197 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1198 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1199
1200 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1201 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1202 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1203 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1204
1205 return GL_TRUE;
1206 }
1207
1208 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm)
1209 {
1210 PVSSRC * texture_coordinate_source;
1211 PVSSRC * texture_unit_source;
1212
1213 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1214 if (tex_instruction_ptr == NULL)
1215 {
1216 return GL_FALSE;
1217 }
1218 Init_R700TextureInstruction(tex_instruction_ptr);
1219
1220 texture_coordinate_source = &(pAsm->S[0].src);
1221 texture_unit_source = &(pAsm->S[1].src);
1222
1223 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1224 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1225 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1226
1227 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1228
1229 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1230 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1231 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1232 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1233 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1234
1235 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1236 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1237 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1238
1239 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1240
1241 // dst
1242 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1243 (pAsm->D.dst.rtype == DST_REG_OUT) )
1244 {
1245 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1246 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1247
1248 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1249 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1250
1251 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1252 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1253 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1254 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1255
1256
1257 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1258 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1259 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1260 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1261 }
1262 else
1263 {
1264 r700_error(ERROR_ASM_TEXDSTBADTYPE, "Only temp destination registers supported for TEX dest regs.");
1265 return GL_FALSE;
1266 }
1267
1268 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1269 {
1270 return GL_FALSE;
1271 }
1272
1273 return GL_TRUE;
1274 }
1275
1276 void initialize(r700_AssemblerBase *pAsm)
1277 {
1278 GLuint cycle, component;
1279
1280 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1281 {
1282 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1283 {
1284 pAsm->hw_gpr[cycle][component] = (-1);
1285 }
1286 }
1287 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1288 {
1289 pAsm->hw_cfile_addr[component] = (-1);
1290 pAsm->hw_cfile_chan[component] = (-1);
1291 }
1292 }
1293
1294 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1295 int source_index,
1296 PVSSRC* pSource,
1297 BITS scalar_channel_index)
1298 {
1299 BITS src_sel;
1300 BITS src_rel;
1301 BITS src_chan;
1302 BITS src_neg;
1303
1304 //--------------------------------------------------------------------------
1305 // Source for operands src0, src1.
1306 // Values [0,127] correspond to GPR[0..127].
1307 // Values [256,511] correspond to cfile constants c[0..255].
1308
1309 //--------------------------------------------------------------------------
1310 // Other special values are shown in the list below.
1311
1312 // 248 SQ_ALU_SRC_0: special constant 0.0.
1313 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1314
1315 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1316 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1317
1318 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1319 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1320
1321 // 254 SQ_ALU_SRC_PV: previous vector result.
1322 // 255 SQ_ALU_SRC_PS: previous scalar result.
1323 //--------------------------------------------------------------------------
1324
1325 BITS channel_swizzle;
1326 switch (scalar_channel_index)
1327 {
1328 case 0: channel_swizzle = pSource->swizzlex; break;
1329 case 1: channel_swizzle = pSource->swizzley; break;
1330 case 2: channel_swizzle = pSource->swizzlez; break;
1331 case 3: channel_swizzle = pSource->swizzlew; break;
1332 default: channel_swizzle = SQ_SEL_MASK; break;
1333 }
1334
1335 if(channel_swizzle == SQ_SEL_0)
1336 {
1337 src_sel = SQ_ALU_SRC_0;
1338 }
1339 else if (channel_swizzle == SQ_SEL_1)
1340 {
1341 src_sel = SQ_ALU_SRC_1;
1342 }
1343 else
1344 {
1345 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1346 (pSource->rtype == SRC_REG_INPUT)
1347 )
1348 {
1349 src_sel = pSource->reg;
1350 }
1351 else if (pSource->rtype == SRC_REG_CONSTANT)
1352 {
1353 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1354 }
1355 else
1356 {
1357 r700_error(ERROR_ASM_ALUSRCBADTYPE, "Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.",
1358 source_index, pSource->rtype);
1359 return GL_FALSE;
1360 }
1361 }
1362
1363 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1364 {
1365 src_rel = SQ_ABSOLUTE;
1366 }
1367 else
1368 {
1369 src_rel = SQ_RELATIVE;
1370 }
1371
1372 switch (channel_swizzle)
1373 {
1374 case SQ_SEL_X:
1375 src_chan = SQ_CHAN_X;
1376 break;
1377 case SQ_SEL_Y:
1378 src_chan = SQ_CHAN_Y;
1379 break;
1380 case SQ_SEL_Z:
1381 src_chan = SQ_CHAN_Z;
1382 break;
1383 case SQ_SEL_W:
1384 src_chan = SQ_CHAN_W;
1385 break;
1386 case SQ_SEL_0:
1387 case SQ_SEL_1:
1388 // Does not matter since src_sel controls
1389 src_chan = SQ_CHAN_X;
1390 break;
1391 default:
1392 r700_error(ERROR_ASM_ALUSRCSELECT, "Unknown source select value (%d) in assemble_alu_src().");
1393 return GL_FALSE;
1394 break;
1395 }
1396
1397 switch (scalar_channel_index)
1398 {
1399 case 0: src_neg = pSource->negx; break;
1400 case 1: src_neg = pSource->negy; break;
1401 case 2: src_neg = pSource->negz; break;
1402 case 3: src_neg = pSource->negw; break;
1403 default: src_neg = 0; break;
1404 }
1405
1406 switch (source_index)
1407 {
1408 case 0:
1409 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1410 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1411 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1412 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1413 break;
1414 case 1:
1415 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1416 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1417 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1418 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1419 break;
1420 case 2:
1421 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1422 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1423 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1424 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1425 break;
1426 default:
1427 r700_error(ERROR_ASM_ALUSRCNUMBER, "Only three sources allowed in ALU opcodes.");
1428 return GL_FALSE;
1429 break;
1430 }
1431
1432 return GL_TRUE;
1433 }
1434
1435 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1436 R700ALUInstruction* alu_instruction_ptr,
1437 GLuint contiguous_slots_needed)
1438 {
1439 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1440 {
1441 return GL_FALSE;
1442 }
1443
1444 if ( pAsm->cf_current_alu_clause_ptr == NULL ||
1445 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1446 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1447 ) )
1448 {
1449
1450 //new cf inst for this clause
1451 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1452
1453 // link the new cf to cf segment
1454 if(NULL != pAsm->cf_current_alu_clause_ptr)
1455 {
1456 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1457 AddCFInstruction( pAsm->pR700Shader,
1458 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1459 }
1460 else
1461 {
1462 r700_error(ERROR_ASM_ALLOCALUCF, "Could not allocate a new ALU CF instruction.");
1463 return GL_FALSE;
1464 }
1465
1466 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1467 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1468 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1469
1470 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1471 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1472 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1473
1474 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1475 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1476 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1477
1478 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1479
1480 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1481 }
1482 else
1483 {
1484 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1485 }
1486
1487 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1488 // set the whole_quad_mode for this clause
1489 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1490 {
1491 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1492 }
1493
1494 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1495 {
1496 alu_instruction_ptr->m_Word0.f.last = 1;
1497 }
1498
1499 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1500 {
1501 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1502 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1503 }
1504
1505 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1506
1507 return GL_TRUE;
1508 }
1509
1510 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1511 int source_index,
1512 BITS* psrc_sel,
1513 BITS* psrc_rel,
1514 BITS* psrc_chan,
1515 BITS* psrc_neg)
1516 {
1517 switch (source_index)
1518 {
1519 case 0:
1520 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1521 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1522 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1523 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1524 break;
1525
1526 case 1:
1527 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1528 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1529 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1530 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1531 break;
1532
1533 case 2:
1534 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1535 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1536 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1537 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1538 break;
1539 }
1540 }
1541
1542 int is_cfile(BITS sel)
1543 {
1544 if (sel > 255 && sel < 512)
1545 {
1546 return 1;
1547 }
1548 return 0;
1549 }
1550
1551 int is_const(BITS sel)
1552 {
1553 if (is_cfile(sel))
1554 {
1555 return 1;
1556 }
1557 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1558 {
1559 return 1;
1560 }
1561 return 0;
1562 }
1563
1564 int is_gpr(BITS sel)
1565 {
1566 if (sel >= 0 && sel < 128)
1567 {
1568 return 1;
1569 }
1570 return 0;
1571 }
1572
1573 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1574 SQ_ALU_VEC_120, //001
1575 SQ_ALU_VEC_102, //010
1576
1577 SQ_ALU_VEC_201, //011
1578 SQ_ALU_VEC_012, //100
1579 SQ_ALU_VEC_021, //101
1580
1581 SQ_ALU_VEC_012, //110
1582 SQ_ALU_VEC_012}; //111
1583
1584 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1585 SQ_ALU_SCL_122, //001
1586 SQ_ALU_SCL_122, //010
1587
1588 SQ_ALU_SCL_221, //011
1589 SQ_ALU_SCL_212, //100
1590 SQ_ALU_SCL_122, //101
1591
1592 SQ_ALU_SCL_122, //110
1593 SQ_ALU_SCL_122}; //111
1594
1595 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1596 GLuint sel,
1597 GLuint chan)
1598 {
1599 int res_match = (-1);
1600 int res_empty = (-1);
1601
1602 GLint res;
1603
1604 for (res=3; res>=0; res--)
1605 {
1606 if(pAsm->hw_cfile_addr[ res] < 0)
1607 {
1608 res_empty = res;
1609 }
1610 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1611 &&
1612 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1613 {
1614 res_match = res;
1615 }
1616 }
1617
1618 if(res_match >= 0)
1619 {
1620 // Read for this scalar component already reserved, nothing to do here.
1621 ;
1622 }
1623 else if(res_empty >= 0)
1624 {
1625 pAsm->hw_cfile_addr[ res_empty ] = sel;
1626 pAsm->hw_cfile_chan[ res_empty ] = chan;
1627 }
1628 else
1629 {
1630 r700_error(ERROR_ASM_CONSTCHANNEL, "All cfile read ports are used, cannot reference C$sel, channel $chan.");
1631 return GL_FALSE;
1632 }
1633 return GL_TRUE;
1634 }
1635
1636 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1637 {
1638 if(pAsm->hw_gpr[cycle][chan] < 0)
1639 {
1640 pAsm->hw_gpr[cycle][chan] = sel;
1641 }
1642 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1643 {
1644 r700_error(ERROR_ASM_BADGPRRESERVE, "Another scalar operation has already used GPR read port for given channel");
1645 return GL_FALSE;
1646 }
1647
1648 return GL_TRUE;
1649 }
1650
1651 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1652 {
1653 switch (swiz)
1654 {
1655 case SQ_ALU_SCL_210:
1656 {
1657 int table[3] = {2, 1, 0};
1658 *pCycle = table[sel];
1659 return GL_TRUE;
1660 }
1661 break;
1662 case SQ_ALU_SCL_122:
1663 {
1664 int table[3] = {1, 2, 2};
1665 *pCycle = table[sel];
1666 return GL_TRUE;
1667 }
1668 break;
1669 case SQ_ALU_SCL_212:
1670 {
1671 int table[3] = {2, 1, 2};
1672 *pCycle = table[sel];
1673 return GL_TRUE;
1674 }
1675 break;
1676 case SQ_ALU_SCL_221:
1677 {
1678 int table[3] = {2, 2, 1};
1679 *pCycle = table[sel];
1680 return GL_TRUE;
1681 }
1682 break;
1683 default:
1684 r700_error(ERROR_ASM_BADSCALARBZ, "Bad Scalar bank swizzle value");
1685 break;
1686 }
1687
1688 return GL_FALSE;
1689 }
1690
1691 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1692 {
1693 switch (swiz)
1694 {
1695 case SQ_ALU_VEC_012:
1696 {
1697 int table[3] = {0, 1, 2};
1698 *pCycle = table[sel];
1699 }
1700 break;
1701 case SQ_ALU_VEC_021:
1702 {
1703 int table[3] = {0, 2, 1};
1704 *pCycle = table[sel];
1705 }
1706 break;
1707 case SQ_ALU_VEC_120:
1708 {
1709 int table[3] = {1, 2, 0};
1710 *pCycle = table[sel];
1711 }
1712 break;
1713 case SQ_ALU_VEC_102:
1714 {
1715 int table[3] = {1, 0, 2};
1716 *pCycle = table[sel];
1717 }
1718 break;
1719 case SQ_ALU_VEC_201:
1720 {
1721 int table[3] = {2, 0, 1};
1722 *pCycle = table[sel];
1723 }
1724 break;
1725 case SQ_ALU_VEC_210:
1726 {
1727 int table[3] = {2, 1, 0};
1728 *pCycle = table[sel];
1729 }
1730 break;
1731 default:
1732 r700_error(ERROR_ASM_BADVECTORBZ, "Bad Vec bank swizzle value");
1733 return GL_FALSE;
1734 break;
1735 }
1736
1737 return GL_TRUE;
1738 }
1739
1740 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1741 R700ALUInstruction* alu_instruction_ptr)
1742 {
1743 GLuint cycle;
1744 GLuint bank_swizzle;
1745 GLuint const_count = 0;
1746
1747 BITS sel;
1748 BITS chan;
1749 BITS rel;
1750 BITS neg;
1751
1752 GLuint src;
1753
1754 BITS src_sel [3] = {0,0,0};
1755 BITS src_chan[3] = {0,0,0};
1756 BITS src_rel [3] = {0,0,0};
1757 BITS src_neg [3] = {0,0,0};
1758
1759 GLuint swizzle_key;
1760
1761 GLuint number_of_operands = r700GetNumOperands(pAsm);
1762
1763 for (src=0; src<number_of_operands; src++)
1764 {
1765 get_src_properties(alu_instruction_ptr,
1766 src,
1767 &(src_sel[src]),
1768 &(src_rel[src]),
1769 &(src_chan[src]),
1770 &(src_neg[src]) );
1771 }
1772
1773
1774 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1775 (is_const( src_sel[1] ) ? 2 : 0) +
1776 (is_const( src_sel[2] ) ? 1 : 0) );
1777
1778 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
1779
1780 for (src=0; src<number_of_operands; src++)
1781 {
1782 sel = src_sel [src];
1783 chan = src_chan[src];
1784 rel = src_rel [src];
1785 neg = src_neg [src];
1786
1787 if (is_const( sel ))
1788 {
1789 // Any constant, including literal and inline constants
1790 const_count++;
1791
1792 if (is_cfile( sel ))
1793 {
1794 reserve_cfile(pAsm, sel, chan);
1795 }
1796
1797 }
1798 }
1799
1800 for (src=0; src<number_of_operands; src++)
1801 {
1802 sel = src_sel [src];
1803 chan = src_chan[src];
1804 rel = src_rel [src];
1805 neg = src_neg [src];
1806
1807 if( is_gpr(sel) )
1808 {
1809 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1810
1811 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
1812 {
1813 return GL_FALSE;
1814 }
1815
1816 if(cycle < const_count)
1817 {
1818 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1819 {
1820 return GL_FALSE;
1821 }
1822 }
1823 }
1824 }
1825
1826 return GL_TRUE;
1827 }
1828
1829 GLboolean check_vector(r700_AssemblerBase* pAsm,
1830 R700ALUInstruction* alu_instruction_ptr)
1831 {
1832 GLuint cycle;
1833 GLuint bank_swizzle;
1834 GLuint const_count = 0;
1835
1836 GLuint src;
1837
1838 BITS sel;
1839 BITS chan;
1840 BITS rel;
1841 BITS neg;
1842
1843 BITS src_sel [3] = {0,0,0};
1844 BITS src_chan[3] = {0,0,0};
1845 BITS src_rel [3] = {0,0,0};
1846 BITS src_neg [3] = {0,0,0};
1847
1848 GLuint swizzle_key;
1849
1850 GLuint number_of_operands = r700GetNumOperands(pAsm);
1851
1852 for (src=0; src<number_of_operands; src++)
1853 {
1854 get_src_properties(alu_instruction_ptr,
1855 src,
1856 &(src_sel[src]),
1857 &(src_rel[src]),
1858 &(src_chan[src]),
1859 &(src_neg[src]) );
1860 }
1861
1862
1863 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1864 (is_const( src_sel[1] ) ? 2 : 0) +
1865 (is_const( src_sel[2] ) ? 1 : 0)
1866 );
1867
1868 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
1869
1870 for (src=0; src<number_of_operands; src++)
1871 {
1872 sel = src_sel [src];
1873 chan = src_chan[src];
1874 rel = src_rel [src];
1875 neg = src_neg [src];
1876
1877
1878 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1879
1880 if( is_gpr(sel) )
1881 {
1882 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
1883 {
1884 return GL_FALSE;
1885 }
1886
1887 if ( (src == 1) &&
1888 (sel == src_sel[0]) &&
1889 (chan == src_chan[0]) )
1890 {
1891 }
1892 else
1893 {
1894 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1895 {
1896 return GL_FALSE;
1897 }
1898 }
1899 }
1900 else if( is_const(sel) )
1901 {
1902 const_count++;
1903
1904 if( is_cfile(sel) )
1905 {
1906 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
1907 {
1908 return GL_FALSE;
1909 }
1910 }
1911 }
1912 }
1913
1914 return GL_TRUE;
1915 }
1916
1917 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
1918 {
1919 GLuint number_of_scalar_operations;
1920 GLboolean is_single_scalar_operation;
1921 GLuint scalar_channel_index;
1922
1923 PVSSRC * pcurrent_source;
1924 int current_source_index;
1925 GLuint contiguous_slots_needed;
1926
1927 GLuint uNumSrc = r700GetNumOperands(pAsm);
1928 GLuint channel_swizzle, j;
1929 GLuint chan_counter[4] = {0, 0, 0, 0};
1930 PVSSRC * pSource[3];
1931 GLboolean bSplitInst = GL_FALSE;
1932
1933 if (1 == pAsm->D.dst.math)
1934 {
1935 is_single_scalar_operation = GL_TRUE;
1936 number_of_scalar_operations = 1;
1937 }
1938 else
1939 {
1940 is_single_scalar_operation = GL_FALSE;
1941 number_of_scalar_operations = 4;
1942
1943 /* check read port, only very preliminary algorithm, not count in
1944 src0/1 same comp case and prev slot repeat case; also not count relative
1945 addressing. TODO: improve performance. */
1946 for(j=0; j<uNumSrc; j++)
1947 {
1948 pSource[j] = &(pAsm->S[j].src);
1949 }
1950 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
1951 {
1952 for(j=0; j<uNumSrc; j++)
1953 {
1954 switch (scalar_channel_index)
1955 {
1956 case 0: channel_swizzle = pSource[j]->swizzlex; break;
1957 case 1: channel_swizzle = pSource[j]->swizzley; break;
1958 case 2: channel_swizzle = pSource[j]->swizzlez; break;
1959 case 3: channel_swizzle = pSource[j]->swizzlew; break;
1960 default: channel_swizzle = SQ_SEL_MASK; break;
1961 }
1962 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
1963 (pSource[j]->rtype == SRC_REG_INPUT))
1964 && (channel_swizzle <= SQ_SEL_W) )
1965 {
1966 chan_counter[channel_swizzle]++;
1967 }
1968 }
1969 }
1970 if( (chan_counter[SQ_SEL_X] > 3)
1971 || (chan_counter[SQ_SEL_Y] > 3)
1972 || (chan_counter[SQ_SEL_Z] > 3)
1973 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
1974 {
1975 bSplitInst = GL_TRUE;
1976 }
1977 }
1978
1979 contiguous_slots_needed = 0;
1980
1981 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
1982 {
1983 contiguous_slots_needed = 4;
1984 }
1985
1986 initialize(pAsm);
1987
1988 for (scalar_channel_index=0;
1989 scalar_channel_index < number_of_scalar_operations;
1990 scalar_channel_index++)
1991 {
1992 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
1993 if (alu_instruction_ptr == NULL)
1994 {
1995 return GL_FALSE;
1996 }
1997 Init_R700ALUInstruction(alu_instruction_ptr);
1998
1999 //src 0
2000 current_source_index = 0;
2001 pcurrent_source = &(pAsm->S[0].src);
2002
2003 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2004 current_source_index,
2005 pcurrent_source,
2006 scalar_channel_index) )
2007 {
2008 return GL_FALSE;
2009 }
2010
2011 if (pAsm->D.dst.math == 0)
2012 {
2013 // Process source 1
2014 current_source_index = 1;
2015 pcurrent_source = &(pAsm->S[current_source_index].src);
2016
2017 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2018 current_source_index,
2019 pcurrent_source,
2020 scalar_channel_index) )
2021 {
2022 return GL_FALSE;
2023 }
2024 }
2025
2026 //other bits
2027 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2028
2029 if( (is_single_scalar_operation == GL_TRUE)
2030 || (GL_TRUE == bSplitInst) )
2031 {
2032 alu_instruction_ptr->m_Word0.f.last = 1;
2033 }
2034 else
2035 {
2036 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2037 }
2038
2039 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2040 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2041 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2042
2043 // dst
2044 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2045 (pAsm->D.dst.rtype == DST_REG_OUT) )
2046 {
2047 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2048 }
2049 else
2050 {
2051 r700_error(ERROR_ASM_ALUDSTBADTYPE, "Only temp destination registers supported for ALU dest regs.");
2052 return GL_FALSE;
2053 }
2054
2055 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2056
2057 if ( is_single_scalar_operation == GL_TRUE )
2058 {
2059 // Override scalar_channel_index since only one scalar value will be written
2060 if(pAsm->D.dst.writex)
2061 {
2062 scalar_channel_index = 0;
2063 }
2064 else if(pAsm->D.dst.writey)
2065 {
2066 scalar_channel_index = 1;
2067 }
2068 else if(pAsm->D.dst.writez)
2069 {
2070 scalar_channel_index = 2;
2071 }
2072 else if(pAsm->D.dst.writew)
2073 {
2074 scalar_channel_index = 3;
2075 }
2076 }
2077
2078 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2079
2080 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2081
2082 if (pAsm->D.dst.op3)
2083 {
2084 //op3
2085
2086 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2087
2088 //There's 3rd src for op3
2089 current_source_index = 2;
2090 pcurrent_source = &(pAsm->S[current_source_index].src);
2091
2092 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2093 current_source_index,
2094 pcurrent_source,
2095 scalar_channel_index) )
2096 {
2097 return GL_FALSE;
2098 }
2099 }
2100 else
2101 {
2102 //op2
2103 if (pAsm->bR6xx)
2104 {
2105 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2106
2107 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2108 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2109
2110 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2111 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2112 switch (scalar_channel_index)
2113 {
2114 case 0:
2115 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2116 break;
2117 case 1:
2118 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2119 break;
2120 case 2:
2121 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2122 break;
2123 case 3:
2124 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2125 break;
2126 default:
2127 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = SQ_SEL_MASK;
2128 break;
2129 }
2130 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2131 }
2132 else
2133 {
2134 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2135
2136 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2137 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2138
2139 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2140 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2141 switch (scalar_channel_index)
2142 {
2143 case 0:
2144 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2145 break;
2146 case 1:
2147 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2148 break;
2149 case 2:
2150 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2151 break;
2152 case 3:
2153 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2154 break;
2155 default:
2156 alu_instruction_ptr->m_Word1_OP2.f.write_mask = SQ_SEL_MASK;
2157 break;
2158 }
2159 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2160 }
2161 }
2162
2163 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2164 {
2165 return GL_FALSE;
2166 }
2167
2168 /*
2169 * Judge the type of current instruction, is it vector or scalar
2170 * instruction.
2171 */
2172 if (is_single_scalar_operation)
2173 {
2174 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2175 {
2176 return GL_FALSE;
2177 }
2178 }
2179 else
2180 {
2181 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2182 {
2183 return 1;
2184 }
2185 }
2186
2187 contiguous_slots_needed = 0;
2188 }
2189
2190 return GL_TRUE;
2191 }
2192
2193 GLboolean next_ins(r700_AssemblerBase *pAsm)
2194 {
2195 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2196
2197 if( GL_TRUE == IsTex(pILInst->Opcode) )
2198 {
2199 if( GL_FALSE == assemble_tex_instruction(pAsm) )
2200 {
2201 r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling TEX instruction");
2202 return GL_FALSE;
2203 }
2204 }
2205 else
2206 { //ALU
2207 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2208 {
2209 r700_error(ERROR_ASM_TEXINSTRUCTION, "Error assembling ALU instruction");
2210 return GL_FALSE;
2211 }
2212 }
2213
2214 if(pAsm->D.dst.rtype == DST_REG_OUT)
2215 {
2216 if(pAsm->D.dst.op3)
2217 {
2218 // There is no mask for OP3 instructions, so all channels are written
2219 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2220 }
2221 else
2222 {
2223 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2224 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2225 }
2226 }
2227
2228 //reset for next inst.
2229 pAsm->D.bits = 0;
2230 pAsm->S[0].bits = 0;
2231 pAsm->S[1].bits = 0;
2232 pAsm->S[2].bits = 0;
2233
2234 return GL_TRUE;
2235 }
2236
2237 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2238 {
2239 BITS tmp;
2240
2241 checkop1(pAsm);
2242
2243 tmp = gethelpr(pAsm);
2244
2245 // opcode tmp.x, a.x
2246 // MOV dst, tmp.x
2247
2248 pAsm->D.dst.opcode = opcode;
2249 pAsm->D.dst.math = 1;
2250
2251 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2252 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2253 pAsm->D.dst.reg = tmp;
2254 pAsm->D.dst.writex = 1;
2255
2256 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2257 {
2258 return GL_FALSE;
2259 }
2260
2261 if ( GL_FALSE == next_ins(pAsm) )
2262 {
2263 return GL_FALSE;
2264 }
2265
2266 // Now replicate result to all necessary channels in destination
2267 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2268
2269 if( GL_FALSE == assemble_dst(pAsm) )
2270 {
2271 return GL_FALSE;
2272 }
2273
2274 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2275 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2276 pAsm->S[0].src.reg = tmp;
2277
2278 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2279 noneg_PVSSRC(&(pAsm->S[0].src));
2280
2281 if( GL_FALSE == next_ins(pAsm) )
2282 {
2283 return GL_FALSE;
2284 }
2285
2286 return GL_TRUE;
2287 }
2288
2289 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2290 {
2291 checkop1(pAsm);
2292
2293 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2294
2295 if( GL_FALSE == assemble_dst(pAsm) )
2296 {
2297 return GL_FALSE;
2298 }
2299 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2300 {
2301 return GL_FALSE;
2302 }
2303
2304 pAsm->S[1].bits = pAsm->S[0].bits;
2305 flipneg_PVSSRC(&(pAsm->S[1].src));
2306
2307 if ( GL_FALSE == next_ins(pAsm) )
2308 {
2309 return GL_FALSE;
2310 }
2311
2312 return GL_TRUE;
2313 }
2314
2315 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2316 {
2317 if( GL_FALSE == checkop2(pAsm) )
2318 {
2319 return GL_FALSE;
2320 }
2321
2322 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2323
2324 if( GL_FALSE == assemble_dst(pAsm) )
2325 {
2326 return GL_FALSE;
2327 }
2328
2329 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2330 {
2331 return GL_FALSE;
2332 }
2333
2334 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2335 {
2336 return GL_FALSE;
2337 }
2338
2339 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2340 {
2341 flipneg_PVSSRC(&(pAsm->S[1].src));
2342 }
2343
2344 if( GL_FALSE == next_ins(pAsm) )
2345 {
2346 return GL_FALSE;
2347 }
2348
2349 return GL_TRUE;
2350 }
2351
2352 GLboolean assemble_BAD(char *opcode_str)
2353 {
2354 r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction (%s)", opcode_str);
2355 return GL_FALSE;
2356 }
2357
2358 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2359 {
2360 int tmp;
2361
2362 if( GL_FALSE == checkop3(pAsm) )
2363 {
2364 return GL_FALSE;
2365 }
2366
2367 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2368 pAsm->D.dst.op3 = 1;
2369
2370 tmp = (-1);
2371
2372 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2373 {
2374 //OP3 has no support for write mask
2375 tmp = gethelpr(pAsm);
2376
2377 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2378 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2379 pAsm->D.dst.reg = tmp;
2380
2381 nomask_PVSDST(&(pAsm->D.dst));
2382 }
2383 else
2384 {
2385 if( GL_FALSE == assemble_dst(pAsm) )
2386 {
2387 return GL_FALSE;
2388 }
2389 }
2390
2391 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2392 {
2393 return GL_FALSE;
2394 }
2395
2396 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2397 {
2398 return GL_FALSE;
2399 }
2400
2401 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2402 {
2403 return GL_FALSE;
2404 }
2405
2406 if ( GL_FALSE == next_ins(pAsm) )
2407 {
2408 return GL_FALSE;
2409 }
2410
2411 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2412 {
2413 if( GL_FALSE == assemble_dst(pAsm) )
2414 {
2415 return GL_FALSE;
2416 }
2417
2418 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2419
2420 //tmp for source
2421 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2422 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2423 pAsm->S[0].src.reg = tmp;
2424
2425 noneg_PVSSRC(&(pAsm->S[0].src));
2426 noswizzle_PVSSRC(&(pAsm->S[0].src));
2427
2428 if( GL_FALSE == next_ins(pAsm) )
2429 {
2430 return GL_FALSE;
2431 }
2432 }
2433
2434 return GL_TRUE;
2435 }
2436
2437 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
2438 {
2439 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
2440 }
2441
2442 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2443 {
2444 if( GL_FALSE == checkop2(pAsm) )
2445 {
2446 return GL_FALSE;
2447 }
2448
2449 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2450
2451 if( GL_FALSE == assemble_dst(pAsm) )
2452 {
2453 return GL_FALSE;
2454 }
2455
2456 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2457 {
2458 return GL_FALSE;
2459 }
2460
2461 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2462 {
2463 return GL_FALSE;
2464 }
2465
2466 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2467 {
2468 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2469 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2470 }
2471 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2472 {
2473 onecomp_PVSSRC(&(pAsm->S[1].src), 3);
2474 }
2475
2476 if ( GL_FALSE == next_ins(pAsm) )
2477 {
2478 return GL_FALSE;
2479 }
2480
2481 return GL_TRUE;
2482 }
2483
2484 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2485 {
2486 if( GL_FALSE == checkop2(pAsm) )
2487 {
2488 return GL_FALSE;
2489 }
2490
2491 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2492
2493 if( GL_FALSE == assemble_dst(pAsm) )
2494 {
2495 return GL_FALSE;
2496 }
2497
2498 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2499 {
2500 return GL_FALSE;
2501 }
2502
2503 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2504 {
2505 return GL_FALSE;
2506 }
2507
2508 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2509 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2510
2511 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2512 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2513
2514 if ( GL_FALSE == next_ins(pAsm) )
2515 {
2516 return GL_FALSE;
2517 }
2518
2519 return GL_TRUE;
2520 }
2521
2522 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2523 {
2524 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2525 }
2526
2527 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
2528 {
2529 checkop1(pAsm);
2530
2531 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2532
2533 if ( GL_FALSE == assemble_dst(pAsm) )
2534 {
2535 return GL_FALSE;
2536 }
2537
2538 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2539 {
2540 return GL_FALSE;
2541 }
2542
2543 if ( GL_FALSE == next_ins(pAsm) )
2544 {
2545 return GL_FALSE;
2546 }
2547
2548 return GL_TRUE;
2549 }
2550
2551 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
2552 {
2553 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
2554 }
2555
2556 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
2557 {
2558 checkop1(pAsm);
2559
2560 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2561
2562 if ( GL_FALSE == assemble_dst(pAsm) )
2563 {
2564 return GL_FALSE;
2565 }
2566
2567 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2568 {
2569 return GL_FALSE;
2570 }
2571
2572 if ( GL_FALSE == next_ins(pAsm) )
2573 {
2574 return GL_FALSE;
2575 }
2576
2577 return GL_TRUE;
2578 }
2579
2580 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
2581 {
2582 checkop1(pAsm);
2583
2584 pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
2585
2586 if ( GL_FALSE == assemble_dst(pAsm) )
2587 {
2588 return GL_FALSE;
2589 }
2590
2591 pAsm->D.dst.writex = 0;
2592 pAsm->D.dst.writey = 0;
2593 pAsm->D.dst.writez = 0;
2594 pAsm->D.dst.writew = 0;
2595
2596 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2597 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2598 pAsm->S[0].src.reg = 0;
2599
2600 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
2601 noneg_PVSSRC(&(pAsm->S[0].src));
2602
2603 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2604
2605 if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
2606 {
2607 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
2608 }
2609 else
2610 { //PROGRAM_OUTPUT
2611 pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
2612 }
2613
2614 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2615 noswizzle_PVSSRC(&(pAsm->S[1].src));
2616
2617 if ( GL_FALSE == next_ins(pAsm) )
2618 {
2619 return GL_FALSE;
2620 }
2621
2622 pAsm->pR700Shader->killIsUsed = GL_TRUE;
2623
2624 return GL_TRUE;
2625 }
2626
2627 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
2628 {
2629 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
2630 }
2631
2632 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
2633 {
2634 BITS tmp;
2635
2636 if( GL_FALSE == checkop3(pAsm) )
2637 {
2638 return GL_FALSE;
2639 }
2640
2641 tmp = gethelpr(pAsm);
2642
2643 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2644
2645 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2646 pAsm->D.dst.reg = tmp;
2647 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2648 nomask_PVSDST(&(pAsm->D.dst));
2649
2650
2651 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
2652 {
2653 return GL_FALSE;
2654 }
2655
2656 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
2657 {
2658 return GL_FALSE;
2659 }
2660
2661 neg_PVSSRC(&(pAsm->S[1].src));
2662
2663 if( GL_FALSE == next_ins(pAsm) )
2664 {
2665 return GL_FALSE;
2666 }
2667
2668 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2669 pAsm->D.dst.op3 = 1;
2670
2671 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2672 pAsm->D.dst.reg = tmp;
2673 nomask_PVSDST(&(pAsm->D.dst));
2674 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2675
2676 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2677 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2678 pAsm->S[0].src.reg = tmp;
2679 noswizzle_PVSSRC(&(pAsm->S[0].src));
2680
2681
2682 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
2683 {
2684 return GL_FALSE;
2685 }
2686 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2687 {
2688 return GL_FALSE;
2689 }
2690
2691 if( GL_FALSE == next_ins(pAsm) )
2692 {
2693 return GL_FALSE;
2694 }
2695
2696 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2697
2698 if( GL_FALSE == assemble_dst(pAsm) )
2699 {
2700 return GL_FALSE;
2701 }
2702
2703 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2704 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2705 pAsm->S[0].src.reg = tmp;
2706 noswizzle_PVSSRC(&(pAsm->S[0].src));
2707
2708 if( GL_FALSE == next_ins(pAsm) )
2709 {
2710 return GL_FALSE;
2711 }
2712
2713 return GL_TRUE;
2714 }
2715
2716 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
2717 {
2718 int tmp, ii;
2719 GLboolean bReplaceDst = GL_FALSE;
2720 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2721
2722 if( GL_FALSE == checkop3(pAsm) )
2723 {
2724 return GL_FALSE;
2725 }
2726
2727 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2728 pAsm->D.dst.op3 = 1;
2729
2730 tmp = (-1);
2731
2732 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
2733 { /* TODO : more investigation on MAD src and dst using same register */
2734 for(ii=0; ii<3; ii++)
2735 {
2736 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
2737 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
2738 {
2739 bReplaceDst = GL_TRUE;
2740 break;
2741 }
2742 }
2743 }
2744 if(0xF != pILInst->DstReg.WriteMask)
2745 { /* OP3 has no support for write mask */
2746 bReplaceDst = GL_TRUE;
2747 }
2748
2749 if(GL_TRUE == bReplaceDst)
2750 {
2751 tmp = gethelpr(pAsm);
2752
2753 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2754 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2755 pAsm->D.dst.reg = tmp;
2756
2757 nomask_PVSDST(&(pAsm->D.dst));
2758 }
2759 else
2760 {
2761 if( GL_FALSE == assemble_dst(pAsm) )
2762 {
2763 return GL_FALSE;
2764 }
2765 }
2766
2767 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2768 {
2769 return GL_FALSE;
2770 }
2771
2772 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2773 {
2774 return GL_FALSE;
2775 }
2776
2777 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2778 {
2779 return GL_FALSE;
2780 }
2781
2782 if ( GL_FALSE == next_ins(pAsm) )
2783 {
2784 return GL_FALSE;
2785 }
2786
2787 if (GL_TRUE == bReplaceDst)
2788 {
2789 if( GL_FALSE == assemble_dst(pAsm) )
2790 {
2791 return GL_FALSE;
2792 }
2793
2794 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2795
2796 //tmp for source
2797 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2798 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2799 pAsm->S[0].src.reg = tmp;
2800
2801 noneg_PVSSRC(&(pAsm->S[0].src));
2802 noswizzle_PVSSRC(&(pAsm->S[0].src));
2803
2804 if( GL_FALSE == next_ins(pAsm) )
2805 {
2806 return GL_FALSE;
2807 }
2808 }
2809
2810 return GL_TRUE;
2811 }
2812
2813 /* LIT dst, src */
2814 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
2815 {
2816 unsigned int dstReg;
2817 unsigned int dstType;
2818 unsigned int srcReg;
2819 unsigned int srcType;
2820 checkop1(pAsm);
2821 int tmp = gethelpr(pAsm);
2822
2823 if( GL_FALSE == assemble_dst(pAsm) )
2824 {
2825 return GL_FALSE;
2826 }
2827 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2828 {
2829 return GL_FALSE;
2830 }
2831 dstReg = pAsm->D.dst.reg;
2832 dstType = pAsm->D.dst.rtype;
2833 srcReg = pAsm->S[0].src.reg;
2834 srcType = pAsm->S[0].src.rtype;
2835
2836 /* dst.xw, <- 1.0 */
2837 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2838 pAsm->D.dst.rtype = dstType;
2839 pAsm->D.dst.reg = dstReg;
2840 pAsm->D.dst.writex = 1;
2841 pAsm->D.dst.writey = 0;
2842 pAsm->D.dst.writez = 0;
2843 pAsm->D.dst.writew = 1;
2844 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2845 pAsm->S[0].src.reg = tmp;
2846 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2847 noneg_PVSSRC(&(pAsm->S[0].src));
2848 pAsm->S[0].src.swizzlex = SQ_SEL_1;
2849 pAsm->S[0].src.swizzley = SQ_SEL_1;
2850 pAsm->S[0].src.swizzlez = SQ_SEL_1;
2851 pAsm->S[0].src.swizzlew = SQ_SEL_1;
2852 if( GL_FALSE == next_ins(pAsm) )
2853 {
2854 return GL_FALSE;
2855 }
2856
2857 /* dst.y = max(src.x, 0.0) */
2858 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2859 pAsm->D.dst.rtype = dstType;
2860 pAsm->D.dst.reg = dstReg;
2861 pAsm->D.dst.writex = 0;
2862 pAsm->D.dst.writey = 1;
2863 pAsm->D.dst.writez = 0;
2864 pAsm->D.dst.writew = 0;
2865 pAsm->S[0].src.rtype = srcType;
2866 pAsm->S[0].src.reg = srcReg;
2867 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2868 noneg_PVSSRC(&(pAsm->S[0].src));
2869 pAsm->S[0].src.swizzlex = SQ_SEL_X;
2870 pAsm->S[0].src.swizzley = SQ_SEL_X;
2871 pAsm->S[0].src.swizzlez = SQ_SEL_X;
2872 pAsm->S[0].src.swizzlew = SQ_SEL_X;
2873 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2874 pAsm->S[1].src.reg = tmp;
2875 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2876 noneg_PVSSRC(&(pAsm->S[1].src));
2877 pAsm->S[1].src.swizzlex = SQ_SEL_0;
2878 pAsm->S[1].src.swizzley = SQ_SEL_0;
2879 pAsm->S[1].src.swizzlez = SQ_SEL_0;
2880 pAsm->S[1].src.swizzlew = SQ_SEL_0;
2881 if( GL_FALSE == next_ins(pAsm) )
2882 {
2883 return GL_FALSE;
2884 }
2885
2886 /* before: dst.w = log(src.y)
2887 * after : dst.x = log(src.y)
2888 * why change dest register is that dst.w has been initialized as 1 before
2889 */
2890 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
2891 pAsm->D.dst.math = 1;
2892 pAsm->D.dst.rtype = dstType;
2893 pAsm->D.dst.reg = dstReg;
2894 pAsm->D.dst.writex = 1;
2895 pAsm->D.dst.writey = 0;
2896 pAsm->D.dst.writez = 0;
2897 pAsm->D.dst.writew = 0;
2898 pAsm->S[0].src.rtype = srcType;
2899 pAsm->S[0].src.reg = srcReg;
2900 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2901 noneg_PVSSRC(&(pAsm->S[0].src));
2902 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
2903 pAsm->S[0].src.swizzley = SQ_SEL_Y;
2904 pAsm->S[0].src.swizzlez = SQ_SEL_Y;
2905 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
2906 if( GL_FALSE == next_ins(pAsm) )
2907 {
2908 return GL_FALSE;
2909 }
2910
2911 /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */
2912 /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */
2913 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
2914 pAsm->D.dst.op3 = 1;
2915 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2916 pAsm->D.dst.reg = tmp;
2917 pAsm->D.dst.writex = 1;
2918 pAsm->D.dst.writey = 0;
2919 pAsm->D.dst.writez = 0;
2920 pAsm->D.dst.writew = 0;
2921
2922 pAsm->S[0].src.rtype = srcType;
2923 pAsm->S[0].src.reg = srcReg;
2924 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2925 noneg_PVSSRC(&(pAsm->S[0].src));
2926 pAsm->S[0].src.swizzlex = SQ_SEL_W;
2927 pAsm->S[0].src.swizzley = SQ_SEL_W;
2928 pAsm->S[0].src.swizzlez = SQ_SEL_W;
2929 pAsm->S[0].src.swizzlew = SQ_SEL_W;
2930
2931 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2932 pAsm->S[1].src.reg = dstReg;
2933 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2934 noneg_PVSSRC(&(pAsm->S[1].src));
2935 pAsm->S[1].src.swizzlex = SQ_SEL_X;
2936 pAsm->S[1].src.swizzley = SQ_SEL_X;
2937 pAsm->S[1].src.swizzlez = SQ_SEL_X;
2938 pAsm->S[1].src.swizzlew = SQ_SEL_X;
2939
2940 pAsm->S[2].src.rtype = srcType;
2941 pAsm->S[2].src.reg = srcReg;
2942 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
2943 noneg_PVSSRC(&(pAsm->S[2].src));
2944 pAsm->S[2].src.swizzlex = SQ_SEL_X;
2945 pAsm->S[2].src.swizzley = SQ_SEL_X;
2946 pAsm->S[2].src.swizzlez = SQ_SEL_X;
2947 pAsm->S[2].src.swizzlew = SQ_SEL_X;
2948
2949 if( GL_FALSE == next_ins(pAsm) )
2950 {
2951 return GL_FALSE;
2952 }
2953
2954 /* dst.z = exp(tmp.x) */
2955 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
2956 pAsm->D.dst.math = 1;
2957 pAsm->D.dst.rtype = dstType;
2958 pAsm->D.dst.reg = dstReg;
2959 pAsm->D.dst.writex = 0;
2960 pAsm->D.dst.writey = 0;
2961 pAsm->D.dst.writez = 1;
2962 pAsm->D.dst.writew = 0;
2963
2964 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2965 pAsm->S[0].src.reg = tmp;
2966 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2967 noneg_PVSSRC(&(pAsm->S[0].src));
2968 pAsm->S[0].src.swizzlex = SQ_SEL_X;
2969 pAsm->S[0].src.swizzley = SQ_SEL_X;
2970 pAsm->S[0].src.swizzlez = SQ_SEL_X;
2971 pAsm->S[0].src.swizzlew = SQ_SEL_X;
2972
2973 if( GL_FALSE == next_ins(pAsm) )
2974 {
2975 return GL_FALSE;
2976 }
2977
2978 return GL_TRUE;
2979 }
2980
2981 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
2982 {
2983 if( GL_FALSE == checkop2(pAsm) )
2984 {
2985 return GL_FALSE;
2986 }
2987
2988 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2989
2990 if( GL_FALSE == assemble_dst(pAsm) )
2991 {
2992 return GL_FALSE;
2993 }
2994
2995 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2996 {
2997 return GL_FALSE;
2998 }
2999
3000 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3001 {
3002 return GL_FALSE;
3003 }
3004
3005 if( GL_FALSE == next_ins(pAsm) )
3006 {
3007 return GL_FALSE;
3008 }
3009
3010 return GL_TRUE;
3011 }
3012
3013 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3014 {
3015 if( GL_FALSE == checkop2(pAsm) )
3016 {
3017 return GL_FALSE;
3018 }
3019
3020 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3021
3022 if( GL_FALSE == assemble_dst(pAsm) )
3023 {
3024 return GL_FALSE;
3025 }
3026
3027 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3028 {
3029 return GL_FALSE;
3030 }
3031
3032 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3033 {
3034 return GL_FALSE;
3035 }
3036
3037 if( GL_FALSE == next_ins(pAsm) )
3038 {
3039 return GL_FALSE;
3040 }
3041
3042 return GL_TRUE;
3043 }
3044
3045 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3046 {
3047 checkop1(pAsm);
3048
3049 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3050
3051 if (GL_FALSE == assemble_dst(pAsm))
3052 {
3053 return GL_FALSE;
3054 }
3055
3056 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3057 {
3058 return GL_FALSE;
3059 }
3060
3061 if ( GL_FALSE == next_ins(pAsm) )
3062 {
3063 return GL_FALSE;
3064 }
3065
3066 return GL_TRUE;
3067 }
3068
3069 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3070 {
3071 if( GL_FALSE == checkop2(pAsm) )
3072 {
3073 return GL_FALSE;
3074 }
3075
3076 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3077
3078 if( GL_FALSE == assemble_dst(pAsm) )
3079 {
3080 return GL_FALSE;
3081 }
3082
3083 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3084 {
3085 return GL_FALSE;
3086 }
3087
3088 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3089 {
3090 return GL_FALSE;
3091 }
3092
3093 if( GL_FALSE == next_ins(pAsm) )
3094 {
3095 return GL_FALSE;
3096 }
3097
3098 return GL_TRUE;
3099 }
3100
3101 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3102 {
3103 BITS tmp;
3104
3105 checkop1(pAsm);
3106
3107 tmp = gethelpr(pAsm);
3108
3109 // LG2 tmp.x, a.swizzle
3110 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3111 pAsm->D.dst.math = 1;
3112
3113 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3114 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3115 pAsm->D.dst.reg = tmp;
3116 nomask_PVSDST(&(pAsm->D.dst));
3117
3118 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3119 {
3120 return GL_FALSE;
3121 }
3122
3123 if( GL_FALSE == next_ins(pAsm) )
3124 {
3125 return GL_FALSE;
3126 }
3127
3128 // MUL tmp.x, tmp.x, b.swizzle
3129 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3130
3131 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3132 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3133 pAsm->D.dst.reg = tmp;
3134 nomask_PVSDST(&(pAsm->D.dst));
3135
3136 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3137 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3138 pAsm->S[0].src.reg = tmp;
3139 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3140 noneg_PVSSRC(&(pAsm->S[0].src));
3141
3142 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3143 {
3144 return GL_FALSE;
3145 }
3146
3147 if( GL_FALSE == next_ins(pAsm) )
3148 {
3149 return GL_FALSE;
3150 }
3151
3152 // EX2 dst.mask, tmp.x
3153 // EX2 tmp.x, tmp.x
3154 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3155 pAsm->D.dst.math = 1;
3156
3157 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3158 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3159 pAsm->D.dst.reg = tmp;
3160 nomask_PVSDST(&(pAsm->D.dst));
3161
3162 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3163 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3164 pAsm->S[0].src.reg = tmp;
3165 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3166 noneg_PVSSRC(&(pAsm->S[0].src));
3167
3168 if( GL_FALSE == next_ins(pAsm) )
3169 {
3170 return GL_FALSE;
3171 }
3172
3173 // Now replicate result to all necessary channels in destination
3174 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3175
3176 if( GL_FALSE == assemble_dst(pAsm) )
3177 {
3178 return GL_FALSE;
3179 }
3180
3181 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3182 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3183 pAsm->S[0].src.reg = tmp;
3184
3185 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3186 noneg_PVSSRC(&(pAsm->S[0].src));
3187
3188 if( GL_FALSE == next_ins(pAsm) )
3189 {
3190 return GL_FALSE;
3191 }
3192
3193 return GL_TRUE;
3194 }
3195
3196 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
3197 {
3198 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
3199 }
3200
3201 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
3202 {
3203 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
3204 }
3205
3206 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
3207 {
3208 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
3209 }
3210
3211 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
3212 {
3213 BITS tmp;
3214
3215 checkop1(pAsm);
3216
3217 tmp = gethelpr(pAsm);
3218
3219 // COS tmp.x, a.x
3220 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
3221 pAsm->D.dst.math = 1;
3222
3223 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3224 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3225 pAsm->D.dst.reg = tmp;
3226 pAsm->D.dst.writex = 1;
3227
3228 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3229 {
3230 return GL_FALSE;
3231 }
3232
3233 if ( GL_FALSE == next_ins(pAsm) )
3234 {
3235 return GL_FALSE;
3236 }
3237
3238 // SIN tmp.y, a.x
3239 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
3240 pAsm->D.dst.math = 1;
3241
3242 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3243 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3244 pAsm->D.dst.reg = tmp;
3245 pAsm->D.dst.writey = 1;
3246
3247 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3248 {
3249 return GL_FALSE;
3250 }
3251
3252 if( GL_FALSE == next_ins(pAsm) )
3253 {
3254 return GL_FALSE;
3255 }
3256
3257 // MOV dst.mask, tmp
3258 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3259
3260 if( GL_FALSE == assemble_dst(pAsm) )
3261 {
3262 return GL_FALSE;
3263 }
3264
3265 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3266 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3267 pAsm->S[0].src.reg = tmp;
3268
3269 noswizzle_PVSSRC(&(pAsm->S[0].src));
3270 pAsm->S[0].src.swizzlez = SQ_SEL_0;
3271 pAsm->S[0].src.swizzlew = SQ_SEL_0;
3272
3273 if ( GL_FALSE == next_ins(pAsm) )
3274 {
3275 return GL_FALSE;
3276 }
3277
3278 return GL_TRUE;
3279 }
3280
3281 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
3282 {
3283 if( GL_FALSE == checkop2(pAsm) )
3284 {
3285 return GL_FALSE;
3286 }
3287
3288 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
3289
3290 if( GL_FALSE == assemble_dst(pAsm) )
3291 {
3292 return GL_FALSE;
3293 }
3294
3295 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3296 {
3297 return GL_FALSE;
3298 }
3299
3300 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3301 {
3302 return GL_FALSE;
3303 }
3304
3305 if( GL_FALSE == next_ins(pAsm) )
3306 {
3307 return GL_FALSE;
3308 }
3309
3310 return GL_TRUE;
3311 }
3312
3313 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
3314 {
3315 if( GL_FALSE == checkop2(pAsm) )
3316 {
3317 return GL_FALSE;
3318 }
3319
3320 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
3321
3322 if( GL_FALSE == assemble_dst(pAsm) )
3323 {
3324 return GL_FALSE;
3325 }
3326
3327 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3328 {
3329 return GL_FALSE;
3330 }
3331
3332 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3333 {
3334 return GL_FALSE;
3335 }
3336
3337 if( GL_FALSE == next_ins(pAsm) )
3338 {
3339 return GL_FALSE;
3340 }
3341
3342 return GL_TRUE;
3343 }
3344
3345 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
3346 {
3347 return GL_TRUE;
3348 }
3349
3350 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
3351 {
3352 GLboolean src_const;
3353
3354 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
3355 {
3356 case PROGRAM_CONSTANT:
3357 case PROGRAM_LOCAL_PARAM:
3358 case PROGRAM_ENV_PARAM:
3359 case PROGRAM_STATE_VAR:
3360 src_const = GL_TRUE;
3361 case PROGRAM_TEMPORARY:
3362 case PROGRAM_INPUT:
3363 src_const = GL_FALSE;
3364 }
3365
3366 if (GL_TRUE == src_const)
3367 {
3368 r700_error(TODO_ASM_CONSTTEXADDR, "TODO: Texture coordinates from a constant register not supported.");
3369 return GL_FALSE;
3370 }
3371
3372 switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
3373 {
3374 case OPCODE_TEX:
3375 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
3376 break;
3377 case OPCODE_TXB:
3378 r700_error(TODO_ASM_TXB, "do not support TXB yet");
3379 return GL_FALSE;
3380 break;
3381 case OPCODE_TXP:
3382 /* TODO : tex proj version : divid first 3 components by 4th */
3383 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
3384 break;
3385 default:
3386 r700_error(ERROR_ASM_BADTEXINST, "Internal error: bad texture op (not TEX)");
3387 return GL_FALSE;
3388 break;
3389 }
3390
3391 // Set src1 to tex unit id
3392 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
3393 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3394
3395 //No sw info from mesa compiler, so hard code here.
3396 pAsm->S[1].src.swizzlex = SQ_SEL_X;
3397 pAsm->S[1].src.swizzley = SQ_SEL_Y;
3398 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3399 pAsm->S[1].src.swizzlew = SQ_SEL_W;
3400
3401 if( GL_FALSE == tex_dst(pAsm) )
3402 {
3403 return GL_FALSE;
3404 }
3405
3406 if( GL_FALSE == tex_src(pAsm) )
3407 {
3408 return GL_FALSE;
3409 }
3410
3411 if ( GL_FALSE == next_ins(pAsm) )
3412 {
3413 return GL_FALSE;
3414 }
3415
3416 return GL_TRUE;
3417 }
3418
3419 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
3420 {
3421 BITS tmp;
3422
3423 if( GL_FALSE == checkop2(pAsm) )
3424 {
3425 return GL_FALSE;
3426 }
3427
3428 tmp = gethelpr(pAsm);
3429
3430 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3431
3432 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3433 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3434 pAsm->D.dst.reg = tmp;
3435 nomask_PVSDST(&(pAsm->D.dst));
3436
3437 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3438 {
3439 return GL_FALSE;
3440 }
3441
3442 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3443 {
3444 return GL_FALSE;
3445 }
3446
3447 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3448 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3449
3450 if( GL_FALSE == next_ins(pAsm) )
3451 {
3452 return GL_FALSE;
3453 }
3454
3455 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3456 pAsm->D.dst.op3 = 1;
3457
3458 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3459 {
3460 tmp = gethelpr(pAsm);
3461
3462 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3463 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3464 pAsm->D.dst.reg = tmp;
3465
3466 nomask_PVSDST(&(pAsm->D.dst));
3467 }
3468 else
3469 {
3470 if( GL_FALSE == assemble_dst(pAsm) )
3471 {
3472 return GL_FALSE;
3473 }
3474 }
3475
3476 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3477 {
3478 return GL_FALSE;
3479 }
3480
3481 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3482 {
3483 return GL_FALSE;
3484 }
3485
3486 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3487 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3488
3489 // result1 + (neg) result0
3490 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
3491 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
3492 pAsm->S[2].src.reg = tmp;
3493
3494 neg_PVSSRC(&(pAsm->S[2].src));
3495 noswizzle_PVSSRC(&(pAsm->S[2].src));
3496
3497 if( GL_FALSE == next_ins(pAsm) )
3498 {
3499 return GL_FALSE;
3500 }
3501
3502
3503 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3504 {
3505 if( GL_FALSE == assemble_dst(pAsm) )
3506 {
3507 return GL_FALSE;
3508 }
3509
3510 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3511
3512 // Use tmp as source
3513 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3514 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3515 pAsm->S[0].src.reg = tmp;
3516
3517 noneg_PVSSRC(&(pAsm->S[0].src));
3518 noswizzle_PVSSRC(&(pAsm->S[0].src));
3519
3520 if( GL_FALSE == next_ins(pAsm) )
3521 {
3522 return GL_FALSE;
3523 }
3524 }
3525
3526 return GL_TRUE;
3527 }
3528
3529 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
3530 {
3531 return GL_TRUE;
3532 }
3533
3534 GLboolean assemble_IF(r700_AssemblerBase *pAsm)
3535 {
3536 return GL_TRUE;
3537 }
3538
3539 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
3540 {
3541 return GL_TRUE;
3542 }
3543
3544 GLboolean AssembleInstr(GLuint uiNumberInsts,
3545 struct prog_instruction *pILInst,
3546 r700_AssemblerBase *pR700AsmCode)
3547 {
3548 GLuint i;
3549
3550 pR700AsmCode->pILInst = pILInst;
3551 for(i=0; i<uiNumberInsts; i++)
3552 {
3553 pR700AsmCode->uiCurInst = i;
3554
3555 switch (pILInst[i].Opcode)
3556 {
3557 case OPCODE_ABS:
3558 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
3559 return GL_FALSE;
3560 break;
3561 case OPCODE_ADD:
3562 case OPCODE_SUB:
3563 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
3564 return GL_FALSE;
3565 break;
3566
3567 case OPCODE_ARL:
3568 r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARL ");
3569 //if ( GL_FALSE == assemble_BAD("ARL") )
3570 return GL_FALSE;
3571 break;
3572 case OPCODE_ARR:
3573 r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ARR ");
3574 //if ( GL_FALSE == assemble_BAD("ARR") )
3575 return GL_FALSE;
3576 break;
3577
3578 case OPCODE_CMP:
3579 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
3580 return GL_FALSE;
3581 break;
3582 case OPCODE_COS:
3583 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
3584 return GL_FALSE;
3585 break;
3586
3587 case OPCODE_DP3:
3588 case OPCODE_DP4:
3589 case OPCODE_DPH:
3590 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
3591 return GL_FALSE;
3592 break;
3593
3594 case OPCODE_DST:
3595 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
3596 return GL_FALSE;
3597 break;
3598
3599 case OPCODE_EX2:
3600 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
3601 return GL_FALSE;
3602 break;
3603 case OPCODE_EXP:
3604 r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_EXP ");
3605 //if ( GL_FALSE == assemble_BAD("EXP") )
3606 return GL_FALSE;
3607 break; // approx of EX2
3608
3609 case OPCODE_FLR:
3610 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
3611 return GL_FALSE;
3612 break;
3613 //case OP_FLR_INT:
3614 // if ( GL_FALSE == assemble_FLR_INT() )
3615 // return GL_FALSE;
3616 // break;
3617
3618 case OPCODE_FRC:
3619 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
3620 return GL_FALSE;
3621 break;
3622
3623 case OPCODE_KIL:
3624 if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
3625 return GL_FALSE;
3626 break;
3627 case OPCODE_LG2:
3628 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
3629 return GL_FALSE;
3630 break;
3631 case OPCODE_LIT:
3632 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
3633 return GL_FALSE;
3634 break;
3635 case OPCODE_LRP:
3636 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
3637 return GL_FALSE;
3638 break;
3639 case OPCODE_LOG:
3640 r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_LOG ");
3641 //if ( GL_FALSE == assemble_BAD("LOG") )
3642 return GL_FALSE;
3643 break; // approx of LG2
3644
3645 case OPCODE_MAD:
3646 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
3647 return GL_FALSE;
3648 break;
3649 case OPCODE_MAX:
3650 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
3651 return GL_FALSE;
3652 break;
3653 case OPCODE_MIN:
3654 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
3655 return GL_FALSE;
3656 break;
3657
3658 case OPCODE_MOV:
3659 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3660 return GL_FALSE;
3661 break;
3662 case OPCODE_MUL:
3663 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
3664 return GL_FALSE;
3665 break;
3666
3667 case OPCODE_POW:
3668 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
3669 return GL_FALSE;
3670 break;
3671 case OPCODE_RCP:
3672 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
3673 return GL_FALSE;
3674 break;
3675 case OPCODE_RSQ:
3676 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
3677 return GL_FALSE;
3678 break;
3679 case OPCODE_SIN:
3680 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
3681 return GL_FALSE;
3682 break;
3683 case OPCODE_SCS:
3684 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
3685 return GL_FALSE;
3686 break;
3687
3688 case OPCODE_SGE:
3689 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
3690 return GL_FALSE;
3691 break;
3692 case OPCODE_SLT:
3693 if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
3694 return GL_FALSE;
3695 break;
3696
3697 //case OP_STP:
3698 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
3699 // return GL_FALSE;
3700 // break;
3701
3702 case OPCODE_SWZ:
3703 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3704 {
3705 return GL_FALSE;
3706 }
3707 else
3708 {
3709 if( (i+1)<uiNumberInsts )
3710 {
3711 if(OPCODE_END != pILInst[i+1].Opcode)
3712 {
3713 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
3714 {
3715 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
3716 }
3717 }
3718 }
3719 }
3720 break;
3721
3722 case OPCODE_TEX:
3723 case OPCODE_TXB:
3724 case OPCODE_TXP:
3725 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
3726 return GL_FALSE;
3727 break;
3728
3729 case OPCODE_XPD:
3730 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
3731 return GL_FALSE;
3732 break;
3733
3734 case OPCODE_IF :
3735 if ( GL_FALSE == assemble_IF(pR700AsmCode) )
3736 return GL_FALSE;
3737 break;
3738 case OPCODE_ELSE :
3739 r700_error(TODO_ASM_NEEDIMPINST, "Not yet implemented instruction OPCODE_ELSE ");
3740 //if ( GL_FALSE == assemble_BAD("ELSE") )
3741 return GL_FALSE;
3742 break;
3743 case OPCODE_ENDIF:
3744 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
3745 return GL_FALSE;
3746 break;
3747
3748 //case OPCODE_EXPORT:
3749 // if ( GL_FALSE == assemble_EXPORT() )
3750 // return GL_FALSE;
3751 // break;
3752
3753 case OPCODE_END:
3754 //pR700AsmCode->uiCurInst = i;
3755 //This is to remaind that if in later exoort there is depth/stencil
3756 //export, we need a mov to re-arrange DST channel, where using a
3757 //psuedo inst, we will use this end inst to do it.
3758 return GL_TRUE;
3759
3760 default:
3761 r700_error(ERROR_ASM_UNKNOWNILINST, "internal: unknown instruction");
3762 return GL_FALSE;
3763 }
3764 }
3765
3766 return GL_TRUE;
3767 }
3768
3769 GLboolean Process_Export(r700_AssemblerBase* pAsm,
3770 GLuint type,
3771 GLuint export_starting_index,
3772 GLuint export_count,
3773 GLuint starting_register_number,
3774 GLboolean is_depth_export)
3775 {
3776 unsigned char ucWriteMask;
3777
3778 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
3779 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
3780
3781 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
3782
3783 switch (type)
3784 {
3785 case SQ_EXPORT_PIXEL:
3786 if(GL_TRUE == is_depth_export)
3787 {
3788 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
3789 }
3790 else
3791 {
3792 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
3793 }
3794 break;
3795
3796 case SQ_EXPORT_POS:
3797 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
3798 break;
3799
3800 case SQ_EXPORT_PARAM:
3801 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
3802 break;
3803
3804 default:
3805 r700_error(ERROR_ASM_BADEXPORTTYPE, "Unknown export type: %d", type);
3806 return GL_FALSE;
3807 break;
3808 }
3809
3810 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
3811
3812 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
3813 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
3814 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
3815
3816 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
3817 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
3818 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
3819 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
3820 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
3821 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
3822
3823 if (export_count == 1)
3824 {
3825 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
3826
3827 if( (ucWriteMask & 0x1) != 0)
3828 {
3829 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
3830 }
3831 else
3832 {
3833 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
3834 }
3835 if( ((ucWriteMask>>1) & 0x1) != 0)
3836 {
3837 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
3838 }
3839 else
3840 {
3841 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
3842 }
3843 if( ((ucWriteMask>>2) & 0x1) != 0)
3844 {
3845 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
3846 }
3847 else
3848 {
3849 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
3850 }
3851 if( ((ucWriteMask>>3) & 0x1) != 0)
3852 {
3853 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
3854 }
3855 else
3856 {
3857 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
3858 }
3859 }
3860 else
3861 {
3862 // This should only be used if all components for all registers have been written
3863 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
3864 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
3865 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
3866 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
3867 }
3868
3869 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
3870
3871 return GL_TRUE;
3872 }
3873
3874 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
3875 {
3876 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
3877 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
3878
3879 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
3880
3881 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3882
3883 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3884 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3885 pAsm->D.dst.reg = pAsm->depth_export_register_number;
3886
3887 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
3888
3889 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3890 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3891 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
3892
3893 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
3894
3895 noneg_PVSSRC(&(pAsm->S[0].src));
3896
3897 if( GL_FALSE == next_ins(pAsm) )
3898 {
3899 return GL_FALSE;
3900 }
3901
3902 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
3903
3904 return GL_TRUE;
3905 }
3906
3907 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
3908 GLbitfield OutputsWritten)
3909 {
3910 unsigned int unBit;
3911
3912 if(pR700AsmCode->depth_export_register_number >= 0)
3913 {
3914 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
3915 {
3916 return GL_FALSE;
3917 }
3918 }
3919
3920 unBit = 1 << FRAG_RESULT_COLOR;
3921 if(OutputsWritten & unBit)
3922 {
3923 if( GL_FALSE == Process_Export(pR700AsmCode,
3924 SQ_EXPORT_PIXEL,
3925 0,
3926 1,
3927 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
3928 GL_FALSE) )
3929 {
3930 return GL_FALSE;
3931 }
3932 }
3933 unBit = 1 << FRAG_RESULT_DEPTH;
3934 if(OutputsWritten & unBit)
3935 {
3936 if( GL_FALSE == Process_Export(pR700AsmCode,
3937 SQ_EXPORT_PIXEL,
3938 0,
3939 1,
3940 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
3941 GL_TRUE))
3942 {
3943 return GL_FALSE;
3944 }
3945 }
3946
3947 if(pR700AsmCode->cf_last_export_ptr != NULL)
3948 {
3949 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
3950 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
3951 }
3952
3953 return GL_TRUE;
3954 }
3955
3956 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
3957 GLbitfield OutputsWritten)
3958 {
3959 unsigned int unBit;
3960 unsigned int i;
3961
3962 GLuint export_starting_index = 0;
3963 GLuint export_count = pR700AsmCode->number_of_exports;
3964
3965 unBit = 1 << VERT_RESULT_HPOS;
3966 if(OutputsWritten & unBit)
3967 {
3968 if( GL_FALSE == Process_Export(pR700AsmCode,
3969 SQ_EXPORT_POS,
3970 export_starting_index,
3971 1,
3972 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
3973 GL_FALSE) )
3974 {
3975 return GL_FALSE;
3976 }
3977
3978 export_count--;
3979
3980 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
3981 }
3982
3983 pR700AsmCode->number_of_exports = export_count;
3984
3985 unBit = 1 << VERT_RESULT_COL0;
3986 if(OutputsWritten & unBit)
3987 {
3988 if( GL_FALSE == Process_Export(pR700AsmCode,
3989 SQ_EXPORT_PARAM,
3990 export_starting_index,
3991 1,
3992 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
3993 GL_FALSE) )
3994 {
3995 return GL_FALSE;
3996 }
3997
3998 export_starting_index++;
3999 }
4000
4001 unBit = 1 << VERT_RESULT_COL1;
4002 if(OutputsWritten & unBit)
4003 {
4004 if( GL_FALSE == Process_Export(pR700AsmCode,
4005 SQ_EXPORT_PARAM,
4006 export_starting_index,
4007 1,
4008 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
4009 GL_FALSE) )
4010 {
4011 return GL_FALSE;
4012 }
4013
4014 export_starting_index++;
4015 }
4016
4017 unBit = 1 << VERT_RESULT_FOGC;
4018 if(OutputsWritten & unBit)
4019 {
4020 if( GL_FALSE == Process_Export(pR700AsmCode,
4021 SQ_EXPORT_PARAM,
4022 export_starting_index,
4023 1,
4024 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
4025 GL_FALSE) )
4026 {
4027 return GL_FALSE;
4028 }
4029
4030 export_starting_index++;
4031 }
4032
4033 for(i=0; i<8; i++)
4034 {
4035 unBit = 1 << (VERT_RESULT_TEX0 + i);
4036 if(OutputsWritten & unBit)
4037 {
4038 if( GL_FALSE == Process_Export(pR700AsmCode,
4039 SQ_EXPORT_PARAM,
4040 export_starting_index,
4041 1,
4042 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
4043 GL_FALSE) )
4044 {
4045 return GL_FALSE;
4046 }
4047
4048 export_starting_index++;
4049 }
4050 }
4051
4052 // At least one param should be exported
4053 if (export_count)
4054 {
4055 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4056 }
4057 else
4058 {
4059 if( GL_FALSE == Process_Export(pR700AsmCode,
4060 SQ_EXPORT_PARAM,
4061 0,
4062 1,
4063 pR700AsmCode->starting_export_register_number,
4064 GL_FALSE) )
4065 {
4066 return GL_FALSE;
4067 }
4068
4069 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
4070 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
4071 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
4072 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
4073 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4074 }
4075
4076 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4077
4078 return GL_TRUE;
4079 }
4080
4081 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
4082 {
4083 FREE(pR700AsmCode->pucOutMask);
4084 FREE(pR700AsmCode->pInstDeps);
4085 return GL_TRUE;
4086 }
4087