efeccb25f1e4c08c98832d05cc410d7c3fe0e04c
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 BITS addrmode_PVSDST(PVSDST * pPVSDST)
42 {
43 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
44 }
45
46 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
47 {
48 pPVSDST->addrmode0 = addrmode & 1;
49 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
50 }
51
52 void nomask_PVSDST(PVSDST * pPVSDST)
53 {
54 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
55 }
56
57 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
58 {
59 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
60 }
61
62 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
63 {
64 pPVSSRC->addrmode0 = addrmode & 1;
65 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
66 }
67
68
69 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
70 {
71 pPVSSRC->swizzlex =
72 pPVSSRC->swizzley =
73 pPVSSRC->swizzlez =
74 pPVSSRC->swizzlew = swz;
75 }
76
77 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
78 {
79 pPVSSRC->swizzlex = SQ_SEL_X;
80 pPVSSRC->swizzley = SQ_SEL_Y;
81 pPVSSRC->swizzlez = SQ_SEL_Z;
82 pPVSSRC->swizzlew = SQ_SEL_W;
83 }
84
85 void
86 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
87 {
88 switch (x)
89 {
90 case SQ_SEL_X: x = pPVSSRC->swizzlex;
91 break;
92 case SQ_SEL_Y: x = pPVSSRC->swizzley;
93 break;
94 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
95 break;
96 case SQ_SEL_W: x = pPVSSRC->swizzlew;
97 break;
98 default:;
99 }
100
101 switch (y)
102 {
103 case SQ_SEL_X: y = pPVSSRC->swizzlex;
104 break;
105 case SQ_SEL_Y: y = pPVSSRC->swizzley;
106 break;
107 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
108 break;
109 case SQ_SEL_W: y = pPVSSRC->swizzlew;
110 break;
111 default:;
112 }
113
114 switch (z)
115 {
116 case SQ_SEL_X: z = pPVSSRC->swizzlex;
117 break;
118 case SQ_SEL_Y: z = pPVSSRC->swizzley;
119 break;
120 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
121 break;
122 case SQ_SEL_W: z = pPVSSRC->swizzlew;
123 break;
124 default:;
125 }
126
127 switch (w)
128 {
129 case SQ_SEL_X: w = pPVSSRC->swizzlex;
130 break;
131 case SQ_SEL_Y: w = pPVSSRC->swizzley;
132 break;
133 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
134 break;
135 case SQ_SEL_W: w = pPVSSRC->swizzlew;
136 break;
137 default:;
138 }
139
140 pPVSSRC->swizzlex = x;
141 pPVSSRC->swizzley = y;
142 pPVSSRC->swizzlez = z;
143 pPVSSRC->swizzlew = w;
144 }
145
146 void neg_PVSSRC(PVSSRC* pPVSSRC)
147 {
148 pPVSSRC->negx = 1;
149 pPVSSRC->negy = 1;
150 pPVSSRC->negz = 1;
151 pPVSSRC->negw = 1;
152 }
153
154 void noneg_PVSSRC(PVSSRC* pPVSSRC)
155 {
156 pPVSSRC->negx = 0;
157 pPVSSRC->negy = 0;
158 pPVSSRC->negz = 0;
159 pPVSSRC->negw = 0;
160 }
161
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
164 {
165 pPVSSRC->negx = !pPVSSRC->negx;
166 pPVSSRC->negy = !pPVSSRC->negy;
167 pPVSSRC->negz = !pPVSSRC->negz;
168 pPVSSRC->negw = !pPVSSRC->negw;
169 }
170
171 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
172 {
173 switch (c)
174 {
175 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
176 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
177 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
178 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
179 default:;
180 }
181 }
182
183 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
184 {
185 switch (c)
186 {
187 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
188 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
189 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
190 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
191 default:;
192 }
193 }
194
195 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
196 {
197 return (pOutVTXFmt0->point_size |
198 pOutVTXFmt0->edge_flag |
199 pOutVTXFmt0->rta_index |
200 pOutVTXFmt0->kill_flag |
201 pOutVTXFmt0->viewport_index);
202 }
203
204 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
205 {
206 return (pFPOutFmt->depth |
207 pFPOutFmt->stencil_ref |
208 pFPOutFmt->mask |
209 pFPOutFmt->coverage_to_mask);
210 }
211
212 GLboolean is_reduction_opcode(PVSDWORD* dest)
213 {
214 if (dest->dst.op3 == 0)
215 {
216 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE) )
217 {
218 return GL_TRUE;
219 }
220 }
221 return GL_FALSE;
222 }
223
224 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
225 {
226 GLuint format = FMT_INVALID;
227 GLuint uiElemSize = 0;
228
229 switch (eType)
230 {
231 case GL_BYTE:
232 case GL_UNSIGNED_BYTE:
233 uiElemSize = 1;
234 switch(nChannels)
235 {
236 case 1:
237 format = FMT_8; break;
238 case 2:
239 format = FMT_8_8; break;
240 case 3:
241 format = FMT_8_8_8; break;
242 case 4:
243 format = FMT_8_8_8_8; break;
244 default:
245 break;
246 }
247 break;
248
249 case GL_UNSIGNED_SHORT:
250 case GL_SHORT:
251 uiElemSize = 2;
252 switch(nChannels)
253 {
254 case 1:
255 format = FMT_16; break;
256 case 2:
257 format = FMT_16_16; break;
258 case 3:
259 format = FMT_16_16_16; break;
260 case 4:
261 format = FMT_16_16_16_16; break;
262 default:
263 break;
264 }
265 break;
266
267 case GL_UNSIGNED_INT:
268 case GL_INT:
269 uiElemSize = 4;
270 switch(nChannels)
271 {
272 case 1:
273 format = FMT_32; break;
274 case 2:
275 format = FMT_32_32; break;
276 case 3:
277 format = FMT_32_32_32; break;
278 case 4:
279 format = FMT_32_32_32_32; break;
280 default:
281 break;
282 }
283 break;
284
285 case GL_FLOAT:
286 uiElemSize = 4;
287 switch(nChannels)
288 {
289 case 1:
290 format = FMT_32_FLOAT; break;
291 case 2:
292 format = FMT_32_32_FLOAT; break;
293 case 3:
294 format = FMT_32_32_32_FLOAT; break;
295 case 4:
296 format = FMT_32_32_32_32_FLOAT; break;
297 default:
298 break;
299 }
300 break;
301 case GL_DOUBLE:
302 uiElemSize = 8;
303 switch(nChannels)
304 {
305 case 1:
306 format = FMT_32_FLOAT; break;
307 case 2:
308 format = FMT_32_32_FLOAT; break;
309 case 3:
310 format = FMT_32_32_32_FLOAT; break;
311 case 4:
312 format = FMT_32_32_32_32_FLOAT; break;
313 default:
314 break;
315 }
316 break;
317 default:
318 ;
319 //GL_ASSERT_NO_CASE();
320 }
321
322 if(NULL != pClient_size)
323 {
324 *pClient_size = uiElemSize * nChannels;
325 }
326
327 return(format);
328 }
329
330 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
331 {
332 if(pAsm->D.dst.op3)
333 {
334 return 3;
335 }
336
337 switch (pAsm->D.dst.opcode)
338 {
339 case SQ_OP2_INST_ADD:
340 case SQ_OP2_INST_MUL:
341 case SQ_OP2_INST_MAX:
342 case SQ_OP2_INST_MIN:
343 //case SQ_OP2_INST_MAX_DX10:
344 //case SQ_OP2_INST_MIN_DX10:
345 case SQ_OP2_INST_SETGT:
346 case SQ_OP2_INST_SETGE:
347 case SQ_OP2_INST_PRED_SETE:
348 case SQ_OP2_INST_PRED_SETGT:
349 case SQ_OP2_INST_PRED_SETGE:
350 case SQ_OP2_INST_PRED_SETNE:
351 case SQ_OP2_INST_DOT4:
352 case SQ_OP2_INST_DOT4_IEEE:
353 return 2;
354
355 case SQ_OP2_INST_MOV:
356 case SQ_OP2_INST_FRACT:
357 case SQ_OP2_INST_FLOOR:
358 case SQ_OP2_INST_KILLGT:
359 case SQ_OP2_INST_EXP_IEEE:
360 case SQ_OP2_INST_LOG_CLAMPED:
361 case SQ_OP2_INST_LOG_IEEE:
362 case SQ_OP2_INST_RECIP_IEEE:
363 case SQ_OP2_INST_RECIPSQRT_IEEE:
364 case SQ_OP2_INST_FLT_TO_INT:
365 case SQ_OP2_INST_SIN:
366 case SQ_OP2_INST_COS:
367 return 1;
368
369 default: radeon_error(
370 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
371 };
372
373 return 3;
374 }
375
376 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
377 {
378 GLuint i;
379
380 Init_R700_Shader(pShader);
381 pAsm->pR700Shader = pShader;
382 pAsm->currentShaderType = spt;
383
384 pAsm->cf_last_export_ptr = NULL;
385
386 pAsm->cf_current_export_clause_ptr = NULL;
387 pAsm->cf_current_alu_clause_ptr = NULL;
388 pAsm->cf_current_tex_clause_ptr = NULL;
389 pAsm->cf_current_vtx_clause_ptr = NULL;
390 pAsm->cf_current_cf_clause_ptr = NULL;
391
392 // No clause has been created yet
393 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
394
395 pAsm->number_of_colorandz_exports = 0;
396 pAsm->number_of_exports = 0;
397 pAsm->number_of_export_opcodes = 0;
398
399
400 pAsm->D.bits = 0;
401 pAsm->S[0].bits = 0;
402 pAsm->S[1].bits = 0;
403 pAsm->S[2].bits = 0;
404
405 pAsm->uLastPosUpdate = 0;
406
407 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
408
409 pAsm->uIIns = 0;
410 pAsm->uOIns = 0;
411 pAsm->number_used_registers = 0;
412 pAsm->uUsedConsts = 256;
413
414
415 // Fragment programs
416 pAsm->uBoolConsts = 0;
417 pAsm->uIntConsts = 0;
418 pAsm->uInsts = 0;
419 pAsm->uConsts = 0;
420
421 pAsm->FCSP = 0;
422 pAsm->fc_stack[0].type = FC_NONE;
423
424 pAsm->branch_depth = 0;
425 pAsm->max_branch_depth = 0;
426
427 pAsm->aArgSubst[0] =
428 pAsm->aArgSubst[1] =
429 pAsm->aArgSubst[2] =
430 pAsm->aArgSubst[3] = (-1);
431
432 pAsm->uOutputs = 0;
433
434 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
435 {
436 pAsm->color_export_register_number[i] = (-1);
437 }
438
439
440 pAsm->depth_export_register_number = (-1);
441 pAsm->stencil_export_register_number = (-1);
442 pAsm->coverage_to_mask_export_register_number = (-1);
443 pAsm->mask_export_register_number = (-1);
444
445 pAsm->starting_export_register_number = 0;
446 pAsm->starting_vfetch_register_number = 0;
447 pAsm->starting_temp_register_number = 0;
448 pAsm->uFirstHelpReg = 0;
449
450
451 pAsm->input_position_is_used = GL_FALSE;
452 pAsm->input_normal_is_used = GL_FALSE;
453
454
455 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
456 {
457 pAsm->input_color_is_used[ i ] = GL_FALSE;
458 }
459
460 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
461 {
462 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
463 }
464
465 for (i=0; i<VERT_ATTRIB_MAX; i++)
466 {
467 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
468 }
469
470 pAsm->number_of_inputs = 0;
471
472 return 0;
473 }
474
475 GLboolean IsTex(gl_inst_opcode Opcode)
476 {
477 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
478 {
479 return GL_TRUE;
480 }
481 return GL_FALSE;
482 }
483
484 GLboolean IsAlu(gl_inst_opcode Opcode)
485 {
486 //TODO : more for fc and ex for higher spec.
487 if( IsTex(Opcode) )
488 {
489 return GL_FALSE;
490 }
491 return GL_TRUE;
492 }
493
494 int check_current_clause(r700_AssemblerBase* pAsm,
495 CF_CLAUSE_TYPE new_clause_type)
496 {
497 if (pAsm->cf_current_clause_type != new_clause_type)
498 { //Close last open clause
499 switch (pAsm->cf_current_clause_type)
500 {
501 case CF_ALU_CLAUSE:
502 if ( pAsm->cf_current_alu_clause_ptr != NULL)
503 {
504 pAsm->cf_current_alu_clause_ptr = NULL;
505 }
506 break;
507 case CF_VTX_CLAUSE:
508 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
509 {
510 pAsm->cf_current_vtx_clause_ptr = NULL;
511 }
512 break;
513 case CF_TEX_CLAUSE:
514 if ( pAsm->cf_current_tex_clause_ptr != NULL)
515 {
516 pAsm->cf_current_tex_clause_ptr = NULL;
517 }
518 break;
519 case CF_EXPORT_CLAUSE:
520 if ( pAsm->cf_current_export_clause_ptr != NULL)
521 {
522 pAsm->cf_current_export_clause_ptr = NULL;
523 }
524 break;
525 case CF_OTHER_CLAUSE:
526 if ( pAsm->cf_current_cf_clause_ptr != NULL)
527 {
528 pAsm->cf_current_cf_clause_ptr = NULL;
529 }
530 break;
531 case CF_EMPTY_CLAUSE:
532 break;
533 default:
534 radeon_error(
535 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
536 return GL_FALSE;
537 }
538
539 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
540
541 // Create new clause
542 switch (new_clause_type)
543 {
544 case CF_ALU_CLAUSE:
545 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
546 break;
547 case CF_VTX_CLAUSE:
548 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
549 break;
550 case CF_TEX_CLAUSE:
551 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
552 break;
553 case CF_EXPORT_CLAUSE:
554 {
555 R700ControlFlowSXClause* pR700ControlFlowSXClause
556 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
557
558 // Add new export instruction to control flow program
559 if (pR700ControlFlowSXClause != 0)
560 {
561 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
562 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
563 AddCFInstruction( pAsm->pR700Shader,
564 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
565 }
566 else
567 {
568 radeon_error(
569 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
570 return GL_FALSE;
571 }
572 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
573 }
574 break;
575 case CF_EMPTY_CLAUSE:
576 break;
577 case CF_OTHER_CLAUSE:
578 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
579 break;
580 default:
581 radeon_error(
582 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
583 return GL_FALSE;
584 }
585 }
586
587 return GL_TRUE;
588 }
589
590 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
591 R700VertexInstruction* vertex_instruction_ptr)
592 {
593 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
594 {
595 return GL_FALSE;
596 }
597
598 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
599 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
600 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
601 ) )
602 {
603 // Create new Vfetch control flow instruction for this new clause
604 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
605
606 if (pAsm->cf_current_vtx_clause_ptr != NULL)
607 {
608 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
609 AddCFInstruction( pAsm->pR700Shader,
610 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
611 }
612 else
613 {
614 radeon_error("Could not allocate a new VFetch CF instruction.\n");
615 return GL_FALSE;
616 }
617
618 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
619 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
620 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
621 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
622 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
623 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
624 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
625 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
626 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
627
628 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
629 }
630 else
631 {
632 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
633 }
634
635 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
636
637 return GL_TRUE;
638 }
639
640 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
641 R700TextureInstruction* tex_instruction_ptr)
642 {
643 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
644 {
645 return GL_FALSE;
646 }
647
648 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
649 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
650 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
651 ) )
652 {
653 // new tex cf instruction for this new clause
654 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
655
656 if (pAsm->cf_current_tex_clause_ptr != NULL)
657 {
658 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
659 AddCFInstruction( pAsm->pR700Shader,
660 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
661 }
662 else
663 {
664 radeon_error("Could not allocate a new TEX CF instruction.\n");
665 return GL_FALSE;
666 }
667
668 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
669 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
670 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
671
672 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
673 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
674 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
675 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
676 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
677 }
678 else
679 {
680 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
681 }
682
683 // If this clause constains any TEX instruction that is dependent on a previous instruction,
684 // set the barrier bit
685 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
686 {
687 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
688 }
689
690 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
691 {
692 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
693 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
694 }
695
696 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
697
698 return GL_TRUE;
699 }
700
701 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
702 GLuint gl_client_id,
703 GLuint destination_register,
704 GLuint number_of_elements,
705 GLenum dataElementType,
706 VTX_FETCH_METHOD* pFetchMethod)
707 {
708 GLuint client_size_inbyte;
709 GLuint data_format;
710 GLuint mega_fetch_count;
711 GLuint is_mega_fetch_flag;
712
713 R700VertexGenericFetch* vfetch_instruction_ptr;
714 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
715
716 if (assembled_vfetch_instruction_ptr == NULL)
717 {
718 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
719 if (vfetch_instruction_ptr == NULL)
720 {
721 return GL_FALSE;
722 }
723 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
724 }
725 else
726 {
727 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
728 }
729
730 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
731
732 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
733 {
734 //TODO : mini fetch
735 }
736 else
737 {
738 mega_fetch_count = MEGA_FETCH_BYTES - 1;
739 is_mega_fetch_flag = 0x1;
740 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
741 }
742
743 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
744 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
745 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
746
747 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
748 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
749 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
750 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
751 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
752
753 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
754 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
755 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
756 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
757
758 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
759
760 // Destination register
761 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
762 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
763
764 vfetch_instruction_ptr->m_Word2.f.offset = 0;
765 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
766
767 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
768
769 if (assembled_vfetch_instruction_ptr == NULL)
770 {
771 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
772 {
773 return GL_FALSE;
774 }
775
776 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
777 {
778 return GL_FALSE;
779 }
780 else
781 {
782 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
783 }
784 }
785
786 return GL_TRUE;
787 }
788
789 GLuint gethelpr(r700_AssemblerBase* pAsm)
790 {
791 GLuint r = pAsm->uHelpReg;
792 pAsm->uHelpReg++;
793 if (pAsm->uHelpReg > pAsm->number_used_registers)
794 {
795 pAsm->number_used_registers = pAsm->uHelpReg;
796 }
797 return r;
798 }
799 void resethelpr(r700_AssemblerBase* pAsm)
800 {
801 pAsm->uHelpReg = pAsm->uFirstHelpReg;
802 }
803
804 void checkop_init(r700_AssemblerBase* pAsm)
805 {
806 resethelpr(pAsm);
807 pAsm->aArgSubst[0] =
808 pAsm->aArgSubst[1] =
809 pAsm->aArgSubst[2] =
810 pAsm->aArgSubst[3] = -1;
811 }
812
813 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
814 {
815 GLuint tmp = gethelpr(pAsm);
816
817 //mov src to temp helper gpr.
818 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
819
820 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
821
822 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
823 pAsm->D.dst.reg = tmp;
824
825 nomask_PVSDST(&(pAsm->D.dst));
826
827 if( GL_FALSE == assemble_src(pAsm, src, 0) )
828 {
829 return GL_FALSE;
830 }
831
832 noswizzle_PVSSRC(&(pAsm->S[0].src));
833 noneg_PVSSRC(&(pAsm->S[0].src));
834
835 if( GL_FALSE == next_ins(pAsm) )
836 {
837 return GL_FALSE;
838 }
839
840 pAsm->aArgSubst[1 + src] = tmp;
841
842 return GL_TRUE;
843 }
844
845 GLboolean checkop1(r700_AssemblerBase* pAsm)
846 {
847 checkop_init(pAsm);
848 return GL_TRUE;
849 }
850
851 GLboolean checkop2(r700_AssemblerBase* pAsm)
852 {
853 GLboolean bSrcConst[2];
854 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
855
856 checkop_init(pAsm);
857
858 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
859 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
860 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
861 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
862 {
863 bSrcConst[0] = GL_TRUE;
864 }
865 else
866 {
867 bSrcConst[0] = GL_FALSE;
868 }
869 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
870 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
871 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
872 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
873 {
874 bSrcConst[1] = GL_TRUE;
875 }
876 else
877 {
878 bSrcConst[1] = GL_FALSE;
879 }
880
881 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
882 {
883 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
884 {
885 if( GL_FALSE == mov_temp(pAsm, 1) )
886 {
887 return GL_FALSE;
888 }
889 }
890 }
891
892 return GL_TRUE;
893 }
894
895 GLboolean checkop3(r700_AssemblerBase* pAsm)
896 {
897 GLboolean bSrcConst[3];
898 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
899
900 checkop_init(pAsm);
901
902 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
903 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
904 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
905 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
906 {
907 bSrcConst[0] = GL_TRUE;
908 }
909 else
910 {
911 bSrcConst[0] = GL_FALSE;
912 }
913 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
914 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
915 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
916 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
917 {
918 bSrcConst[1] = GL_TRUE;
919 }
920 else
921 {
922 bSrcConst[1] = GL_FALSE;
923 }
924 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
925 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
926 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
927 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
928 {
929 bSrcConst[2] = GL_TRUE;
930 }
931 else
932 {
933 bSrcConst[2] = GL_FALSE;
934 }
935
936 if( (GL_TRUE == bSrcConst[0]) &&
937 (GL_TRUE == bSrcConst[1]) &&
938 (GL_TRUE == bSrcConst[2]) )
939 {
940 if( GL_FALSE == mov_temp(pAsm, 1) )
941 {
942 return GL_FALSE;
943 }
944 if( GL_FALSE == mov_temp(pAsm, 2) )
945 {
946 return GL_FALSE;
947 }
948
949 return GL_TRUE;
950 }
951 else if( (GL_TRUE == bSrcConst[0]) &&
952 (GL_TRUE == bSrcConst[1]) )
953 {
954 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
955 {
956 if( GL_FALSE == mov_temp(pAsm, 1) )
957 {
958 return 1;
959 }
960 }
961
962 return GL_TRUE;
963 }
964 else if ( (GL_TRUE == bSrcConst[0]) &&
965 (GL_TRUE == bSrcConst[2]) )
966 {
967 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
968 {
969 if( GL_FALSE == mov_temp(pAsm, 2) )
970 {
971 return GL_FALSE;
972 }
973 }
974
975 return GL_TRUE;
976 }
977 else if( (GL_TRUE == bSrcConst[1]) &&
978 (GL_TRUE == bSrcConst[2]) )
979 {
980 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
981 {
982 if( GL_FALSE == mov_temp(pAsm, 2) )
983 {
984 return GL_FALSE;
985 }
986 }
987
988 return GL_TRUE;
989 }
990
991 return GL_TRUE;
992 }
993
994 GLboolean assemble_src(r700_AssemblerBase *pAsm,
995 int src,
996 int fld)
997 {
998 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
999
1000 if (fld == -1)
1001 {
1002 fld = src;
1003 }
1004
1005 if(pAsm->aArgSubst[1+src] >= 0)
1006 {
1007 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1008 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1009 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1010 }
1011 else
1012 {
1013 switch (pILInst->SrcReg[src].File)
1014 {
1015 case PROGRAM_TEMPORARY:
1016 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1017 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1018 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1019 break;
1020 case PROGRAM_CONSTANT:
1021 case PROGRAM_LOCAL_PARAM:
1022 case PROGRAM_ENV_PARAM:
1023 case PROGRAM_STATE_VAR:
1024 if (1 == pILInst->SrcReg[src].RelAddr)
1025 {
1026 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1027 }
1028 else
1029 {
1030 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1031 }
1032
1033 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1034 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1035 break;
1036 case PROGRAM_INPUT:
1037 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1038 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1039 switch (pAsm->currentShaderType)
1040 {
1041 case SPT_FP:
1042 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1043 break;
1044 case SPT_VP:
1045 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1046 break;
1047 }
1048 break;
1049 default:
1050 radeon_error("Invalid source argument type\n");
1051 return GL_FALSE;
1052 }
1053 }
1054
1055 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1056 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1057 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1058 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1059
1060 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1061 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1062 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1063 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1064
1065 return GL_TRUE;
1066 }
1067
1068 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1069 {
1070 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1071 switch (pILInst->DstReg.File)
1072 {
1073 case PROGRAM_TEMPORARY:
1074 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1075 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1076 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1077 break;
1078 case PROGRAM_ADDRESS:
1079 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1080 pAsm->D.dst.rtype = DST_REG_A0;
1081 pAsm->D.dst.reg = 0;
1082 break;
1083 case PROGRAM_OUTPUT:
1084 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1085 pAsm->D.dst.rtype = DST_REG_OUT;
1086 switch (pAsm->currentShaderType)
1087 {
1088 case SPT_FP:
1089 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1090 break;
1091 case SPT_VP:
1092 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1093 break;
1094 }
1095 break;
1096 default:
1097 radeon_error("Invalid destination output argument type\n");
1098 return GL_FALSE;
1099 }
1100
1101 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1102 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1103 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1104 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1105
1106 return GL_TRUE;
1107 }
1108
1109 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1110 {
1111 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1112
1113 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1114 {
1115 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1116 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1117
1118 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1119 }
1120 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1121 {
1122 pAsm->D.dst.rtype = DST_REG_OUT;
1123 switch (pAsm->currentShaderType)
1124 {
1125 case SPT_FP:
1126 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1127 break;
1128 case SPT_VP:
1129 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1130 break;
1131 }
1132
1133 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1134 }
1135 else
1136 {
1137 radeon_error("Invalid destination output argument type\n");
1138 return GL_FALSE;
1139 }
1140
1141 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1142 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1143 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1144 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1145
1146 return GL_TRUE;
1147 }
1148
1149 GLboolean tex_src(r700_AssemblerBase *pAsm)
1150 {
1151 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1152
1153 GLboolean bValidTexCoord = GL_FALSE;
1154
1155 switch (pILInst->SrcReg[0].File) {
1156 case PROGRAM_CONSTANT:
1157 case PROGRAM_LOCAL_PARAM:
1158 case PROGRAM_ENV_PARAM:
1159 case PROGRAM_STATE_VAR:
1160 bValidTexCoord = GL_TRUE;
1161 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1162 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1163 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1164 break;
1165 case PROGRAM_TEMPORARY:
1166 bValidTexCoord = GL_TRUE;
1167 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1168 pAsm->starting_temp_register_number;
1169 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1170 break;
1171 case PROGRAM_INPUT:
1172 switch (pILInst->SrcReg[0].Index)
1173 {
1174 case FRAG_ATTRIB_COL0:
1175 case FRAG_ATTRIB_COL1:
1176 case FRAG_ATTRIB_TEX0:
1177 case FRAG_ATTRIB_TEX1:
1178 case FRAG_ATTRIB_TEX2:
1179 case FRAG_ATTRIB_TEX3:
1180 case FRAG_ATTRIB_TEX4:
1181 case FRAG_ATTRIB_TEX5:
1182 case FRAG_ATTRIB_TEX6:
1183 case FRAG_ATTRIB_TEX7:
1184 bValidTexCoord = GL_TRUE;
1185 pAsm->S[0].src.reg =
1186 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1187 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1188 break;
1189 }
1190 break;
1191 }
1192
1193 if(GL_TRUE == bValidTexCoord)
1194 {
1195 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1196 }
1197 else
1198 {
1199 radeon_error("Invalid source texcoord for TEX instruction\n");
1200 return GL_FALSE;
1201 }
1202
1203 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1204 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1205 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1206 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1207
1208 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1209 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1210 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1211 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1212
1213 return GL_TRUE;
1214 }
1215
1216 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1217 {
1218 PVSSRC * texture_coordinate_source;
1219 PVSSRC * texture_unit_source;
1220
1221 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1222 if (tex_instruction_ptr == NULL)
1223 {
1224 return GL_FALSE;
1225 }
1226 Init_R700TextureInstruction(tex_instruction_ptr);
1227
1228 texture_coordinate_source = &(pAsm->S[0].src);
1229 texture_unit_source = &(pAsm->S[1].src);
1230
1231 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1232 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1233 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1234
1235 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1236
1237 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1238 if (normalized) {
1239 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1240 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1241 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1242 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1243 } else {
1244 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1245 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1246 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1247 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1248 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1249 }
1250
1251 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1252 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1253 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1254
1255 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1256
1257 // dst
1258 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1259 (pAsm->D.dst.rtype == DST_REG_OUT) )
1260 {
1261 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1262 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1263
1264 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1265 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1266
1267 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1268 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1269 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1270 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1271
1272
1273 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1274 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1275 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1276 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1277 }
1278 else
1279 {
1280 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1281 return GL_FALSE;
1282 }
1283
1284 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1285 {
1286 return GL_FALSE;
1287 }
1288
1289 return GL_TRUE;
1290 }
1291
1292 void initialize(r700_AssemblerBase *pAsm)
1293 {
1294 GLuint cycle, component;
1295
1296 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1297 {
1298 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1299 {
1300 pAsm->hw_gpr[cycle][component] = (-1);
1301 }
1302 }
1303 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1304 {
1305 pAsm->hw_cfile_addr[component] = (-1);
1306 pAsm->hw_cfile_chan[component] = (-1);
1307 }
1308 }
1309
1310 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1311 int source_index,
1312 PVSSRC* pSource,
1313 BITS scalar_channel_index)
1314 {
1315 BITS src_sel;
1316 BITS src_rel;
1317 BITS src_chan;
1318 BITS src_neg;
1319
1320 //--------------------------------------------------------------------------
1321 // Source for operands src0, src1.
1322 // Values [0,127] correspond to GPR[0..127].
1323 // Values [256,511] correspond to cfile constants c[0..255].
1324
1325 //--------------------------------------------------------------------------
1326 // Other special values are shown in the list below.
1327
1328 // 248 SQ_ALU_SRC_0: special constant 0.0.
1329 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1330
1331 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1332 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1333
1334 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1335 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1336
1337 // 254 SQ_ALU_SRC_PV: previous vector result.
1338 // 255 SQ_ALU_SRC_PS: previous scalar result.
1339 //--------------------------------------------------------------------------
1340
1341 BITS channel_swizzle;
1342 switch (scalar_channel_index)
1343 {
1344 case 0: channel_swizzle = pSource->swizzlex; break;
1345 case 1: channel_swizzle = pSource->swizzley; break;
1346 case 2: channel_swizzle = pSource->swizzlez; break;
1347 case 3: channel_swizzle = pSource->swizzlew; break;
1348 default: channel_swizzle = SQ_SEL_MASK; break;
1349 }
1350
1351 if(channel_swizzle == SQ_SEL_0)
1352 {
1353 src_sel = SQ_ALU_SRC_0;
1354 }
1355 else if (channel_swizzle == SQ_SEL_1)
1356 {
1357 src_sel = SQ_ALU_SRC_1;
1358 }
1359 else
1360 {
1361 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1362 (pSource->rtype == SRC_REG_INPUT)
1363 )
1364 {
1365 src_sel = pSource->reg;
1366 }
1367 else if (pSource->rtype == SRC_REG_CONSTANT)
1368 {
1369 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1370 }
1371 else
1372 {
1373 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1374 source_index, pSource->rtype);
1375 return GL_FALSE;
1376 }
1377 }
1378
1379 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1380 {
1381 src_rel = SQ_ABSOLUTE;
1382 }
1383 else
1384 {
1385 src_rel = SQ_RELATIVE;
1386 }
1387
1388 switch (channel_swizzle)
1389 {
1390 case SQ_SEL_X:
1391 src_chan = SQ_CHAN_X;
1392 break;
1393 case SQ_SEL_Y:
1394 src_chan = SQ_CHAN_Y;
1395 break;
1396 case SQ_SEL_Z:
1397 src_chan = SQ_CHAN_Z;
1398 break;
1399 case SQ_SEL_W:
1400 src_chan = SQ_CHAN_W;
1401 break;
1402 case SQ_SEL_0:
1403 case SQ_SEL_1:
1404 // Does not matter since src_sel controls
1405 src_chan = SQ_CHAN_X;
1406 break;
1407 default:
1408 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1409 return GL_FALSE;
1410 break;
1411 }
1412
1413 switch (scalar_channel_index)
1414 {
1415 case 0: src_neg = pSource->negx; break;
1416 case 1: src_neg = pSource->negy; break;
1417 case 2: src_neg = pSource->negz; break;
1418 case 3: src_neg = pSource->negw; break;
1419 default: src_neg = 0; break;
1420 }
1421
1422 switch (source_index)
1423 {
1424 case 0:
1425 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1426 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1427 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1428 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1429 break;
1430 case 1:
1431 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1432 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1433 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1434 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1435 break;
1436 case 2:
1437 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1438 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1439 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1440 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1441 break;
1442 default:
1443 radeon_error("Only three sources allowed in ALU opcodes.\n");
1444 return GL_FALSE;
1445 break;
1446 }
1447
1448 return GL_TRUE;
1449 }
1450
1451 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1452 R700ALUInstruction* alu_instruction_ptr,
1453 GLuint contiguous_slots_needed)
1454 {
1455 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1456 {
1457 return GL_FALSE;
1458 }
1459
1460 if ( pAsm->cf_current_alu_clause_ptr == NULL ||
1461 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1462 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1463 ) )
1464 {
1465
1466 //new cf inst for this clause
1467 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1468
1469 // link the new cf to cf segment
1470 if(NULL != pAsm->cf_current_alu_clause_ptr)
1471 {
1472 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1473 AddCFInstruction( pAsm->pR700Shader,
1474 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1475 }
1476 else
1477 {
1478 radeon_error("Could not allocate a new ALU CF instruction.\n");
1479 return GL_FALSE;
1480 }
1481
1482 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1483 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1484 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1485
1486 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1487 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1488 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1489
1490 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1491 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1492 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1493
1494 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1495
1496 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1497 }
1498 else
1499 {
1500 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1501 }
1502
1503 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1504 // set the whole_quad_mode for this clause
1505 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1506 {
1507 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1508 }
1509
1510 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1511 {
1512 alu_instruction_ptr->m_Word0.f.last = 1;
1513 }
1514
1515 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1516 {
1517 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1518 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1519 }
1520
1521 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1522
1523 return GL_TRUE;
1524 }
1525
1526 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1527 int source_index,
1528 BITS* psrc_sel,
1529 BITS* psrc_rel,
1530 BITS* psrc_chan,
1531 BITS* psrc_neg)
1532 {
1533 switch (source_index)
1534 {
1535 case 0:
1536 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1537 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1538 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1539 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1540 break;
1541
1542 case 1:
1543 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1544 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1545 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1546 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1547 break;
1548
1549 case 2:
1550 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1551 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1552 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1553 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1554 break;
1555 }
1556 }
1557
1558 int is_cfile(BITS sel)
1559 {
1560 if (sel > 255 && sel < 512)
1561 {
1562 return 1;
1563 }
1564 return 0;
1565 }
1566
1567 int is_const(BITS sel)
1568 {
1569 if (is_cfile(sel))
1570 {
1571 return 1;
1572 }
1573 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1574 {
1575 return 1;
1576 }
1577 return 0;
1578 }
1579
1580 int is_gpr(BITS sel)
1581 {
1582 if (sel >= 0 && sel < 128)
1583 {
1584 return 1;
1585 }
1586 return 0;
1587 }
1588
1589 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1590 SQ_ALU_VEC_120, //001
1591 SQ_ALU_VEC_102, //010
1592
1593 SQ_ALU_VEC_201, //011
1594 SQ_ALU_VEC_012, //100
1595 SQ_ALU_VEC_021, //101
1596
1597 SQ_ALU_VEC_012, //110
1598 SQ_ALU_VEC_012}; //111
1599
1600 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1601 SQ_ALU_SCL_122, //001
1602 SQ_ALU_SCL_122, //010
1603
1604 SQ_ALU_SCL_221, //011
1605 SQ_ALU_SCL_212, //100
1606 SQ_ALU_SCL_122, //101
1607
1608 SQ_ALU_SCL_122, //110
1609 SQ_ALU_SCL_122}; //111
1610
1611 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1612 GLuint sel,
1613 GLuint chan)
1614 {
1615 int res_match = (-1);
1616 int res_empty = (-1);
1617
1618 GLint res;
1619
1620 for (res=3; res>=0; res--)
1621 {
1622 if(pAsm->hw_cfile_addr[ res] < 0)
1623 {
1624 res_empty = res;
1625 }
1626 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1627 &&
1628 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1629 {
1630 res_match = res;
1631 }
1632 }
1633
1634 if(res_match >= 0)
1635 {
1636 // Read for this scalar component already reserved, nothing to do here.
1637 ;
1638 }
1639 else if(res_empty >= 0)
1640 {
1641 pAsm->hw_cfile_addr[ res_empty ] = sel;
1642 pAsm->hw_cfile_chan[ res_empty ] = chan;
1643 }
1644 else
1645 {
1646 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1647 return GL_FALSE;
1648 }
1649 return GL_TRUE;
1650 }
1651
1652 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1653 {
1654 if(pAsm->hw_gpr[cycle][chan] < 0)
1655 {
1656 pAsm->hw_gpr[cycle][chan] = sel;
1657 }
1658 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1659 {
1660 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1661 return GL_FALSE;
1662 }
1663
1664 return GL_TRUE;
1665 }
1666
1667 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1668 {
1669 switch (swiz)
1670 {
1671 case SQ_ALU_SCL_210:
1672 {
1673 int table[3] = {2, 1, 0};
1674 *pCycle = table[sel];
1675 return GL_TRUE;
1676 }
1677 break;
1678 case SQ_ALU_SCL_122:
1679 {
1680 int table[3] = {1, 2, 2};
1681 *pCycle = table[sel];
1682 return GL_TRUE;
1683 }
1684 break;
1685 case SQ_ALU_SCL_212:
1686 {
1687 int table[3] = {2, 1, 2};
1688 *pCycle = table[sel];
1689 return GL_TRUE;
1690 }
1691 break;
1692 case SQ_ALU_SCL_221:
1693 {
1694 int table[3] = {2, 2, 1};
1695 *pCycle = table[sel];
1696 return GL_TRUE;
1697 }
1698 break;
1699 default:
1700 radeon_error("Bad Scalar bank swizzle value\n");
1701 break;
1702 }
1703
1704 return GL_FALSE;
1705 }
1706
1707 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1708 {
1709 switch (swiz)
1710 {
1711 case SQ_ALU_VEC_012:
1712 {
1713 int table[3] = {0, 1, 2};
1714 *pCycle = table[sel];
1715 }
1716 break;
1717 case SQ_ALU_VEC_021:
1718 {
1719 int table[3] = {0, 2, 1};
1720 *pCycle = table[sel];
1721 }
1722 break;
1723 case SQ_ALU_VEC_120:
1724 {
1725 int table[3] = {1, 2, 0};
1726 *pCycle = table[sel];
1727 }
1728 break;
1729 case SQ_ALU_VEC_102:
1730 {
1731 int table[3] = {1, 0, 2};
1732 *pCycle = table[sel];
1733 }
1734 break;
1735 case SQ_ALU_VEC_201:
1736 {
1737 int table[3] = {2, 0, 1};
1738 *pCycle = table[sel];
1739 }
1740 break;
1741 case SQ_ALU_VEC_210:
1742 {
1743 int table[3] = {2, 1, 0};
1744 *pCycle = table[sel];
1745 }
1746 break;
1747 default:
1748 radeon_error("Bad Vec bank swizzle value\n");
1749 return GL_FALSE;
1750 break;
1751 }
1752
1753 return GL_TRUE;
1754 }
1755
1756 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1757 R700ALUInstruction* alu_instruction_ptr)
1758 {
1759 GLuint cycle;
1760 GLuint bank_swizzle;
1761 GLuint const_count = 0;
1762
1763 BITS sel;
1764 BITS chan;
1765 BITS rel;
1766 BITS neg;
1767
1768 GLuint src;
1769
1770 BITS src_sel [3] = {0,0,0};
1771 BITS src_chan[3] = {0,0,0};
1772 BITS src_rel [3] = {0,0,0};
1773 BITS src_neg [3] = {0,0,0};
1774
1775 GLuint swizzle_key;
1776
1777 GLuint number_of_operands = r700GetNumOperands(pAsm);
1778
1779 for (src=0; src<number_of_operands; src++)
1780 {
1781 get_src_properties(alu_instruction_ptr,
1782 src,
1783 &(src_sel[src]),
1784 &(src_rel[src]),
1785 &(src_chan[src]),
1786 &(src_neg[src]) );
1787 }
1788
1789
1790 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1791 (is_const( src_sel[1] ) ? 2 : 0) +
1792 (is_const( src_sel[2] ) ? 1 : 0) );
1793
1794 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
1795
1796 for (src=0; src<number_of_operands; src++)
1797 {
1798 sel = src_sel [src];
1799 chan = src_chan[src];
1800 rel = src_rel [src];
1801 neg = src_neg [src];
1802
1803 if (is_const( sel ))
1804 {
1805 // Any constant, including literal and inline constants
1806 const_count++;
1807
1808 if (is_cfile( sel ))
1809 {
1810 reserve_cfile(pAsm, sel, chan);
1811 }
1812
1813 }
1814 }
1815
1816 for (src=0; src<number_of_operands; src++)
1817 {
1818 sel = src_sel [src];
1819 chan = src_chan[src];
1820 rel = src_rel [src];
1821 neg = src_neg [src];
1822
1823 if( is_gpr(sel) )
1824 {
1825 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1826
1827 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
1828 {
1829 return GL_FALSE;
1830 }
1831
1832 if(cycle < const_count)
1833 {
1834 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1835 {
1836 return GL_FALSE;
1837 }
1838 }
1839 }
1840 }
1841
1842 return GL_TRUE;
1843 }
1844
1845 GLboolean check_vector(r700_AssemblerBase* pAsm,
1846 R700ALUInstruction* alu_instruction_ptr)
1847 {
1848 GLuint cycle;
1849 GLuint bank_swizzle;
1850 GLuint const_count = 0;
1851
1852 GLuint src;
1853
1854 BITS sel;
1855 BITS chan;
1856 BITS rel;
1857 BITS neg;
1858
1859 BITS src_sel [3] = {0,0,0};
1860 BITS src_chan[3] = {0,0,0};
1861 BITS src_rel [3] = {0,0,0};
1862 BITS src_neg [3] = {0,0,0};
1863
1864 GLuint swizzle_key;
1865
1866 GLuint number_of_operands = r700GetNumOperands(pAsm);
1867
1868 for (src=0; src<number_of_operands; src++)
1869 {
1870 get_src_properties(alu_instruction_ptr,
1871 src,
1872 &(src_sel[src]),
1873 &(src_rel[src]),
1874 &(src_chan[src]),
1875 &(src_neg[src]) );
1876 }
1877
1878
1879 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1880 (is_const( src_sel[1] ) ? 2 : 0) +
1881 (is_const( src_sel[2] ) ? 1 : 0)
1882 );
1883
1884 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
1885
1886 for (src=0; src<number_of_operands; src++)
1887 {
1888 sel = src_sel [src];
1889 chan = src_chan[src];
1890 rel = src_rel [src];
1891 neg = src_neg [src];
1892
1893
1894 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1895
1896 if( is_gpr(sel) )
1897 {
1898 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
1899 {
1900 return GL_FALSE;
1901 }
1902
1903 if ( (src == 1) &&
1904 (sel == src_sel[0]) &&
1905 (chan == src_chan[0]) )
1906 {
1907 }
1908 else
1909 {
1910 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1911 {
1912 return GL_FALSE;
1913 }
1914 }
1915 }
1916 else if( is_const(sel) )
1917 {
1918 const_count++;
1919
1920 if( is_cfile(sel) )
1921 {
1922 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
1923 {
1924 return GL_FALSE;
1925 }
1926 }
1927 }
1928 }
1929
1930 return GL_TRUE;
1931 }
1932
1933 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
1934 {
1935 GLuint number_of_scalar_operations;
1936 GLboolean is_single_scalar_operation;
1937 GLuint scalar_channel_index;
1938
1939 PVSSRC * pcurrent_source;
1940 int current_source_index;
1941 GLuint contiguous_slots_needed;
1942
1943 GLuint uNumSrc = r700GetNumOperands(pAsm);
1944 GLuint channel_swizzle, j;
1945 GLuint chan_counter[4] = {0, 0, 0, 0};
1946 PVSSRC * pSource[3];
1947 GLboolean bSplitInst = GL_FALSE;
1948
1949 if (1 == pAsm->D.dst.math)
1950 {
1951 is_single_scalar_operation = GL_TRUE;
1952 number_of_scalar_operations = 1;
1953 }
1954 else
1955 {
1956 is_single_scalar_operation = GL_FALSE;
1957 number_of_scalar_operations = 4;
1958
1959 /* check read port, only very preliminary algorithm, not count in
1960 src0/1 same comp case and prev slot repeat case; also not count relative
1961 addressing. TODO: improve performance. */
1962 for(j=0; j<uNumSrc; j++)
1963 {
1964 pSource[j] = &(pAsm->S[j].src);
1965 }
1966 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
1967 {
1968 for(j=0; j<uNumSrc; j++)
1969 {
1970 switch (scalar_channel_index)
1971 {
1972 case 0: channel_swizzle = pSource[j]->swizzlex; break;
1973 case 1: channel_swizzle = pSource[j]->swizzley; break;
1974 case 2: channel_swizzle = pSource[j]->swizzlez; break;
1975 case 3: channel_swizzle = pSource[j]->swizzlew; break;
1976 default: channel_swizzle = SQ_SEL_MASK; break;
1977 }
1978 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
1979 (pSource[j]->rtype == SRC_REG_INPUT))
1980 && (channel_swizzle <= SQ_SEL_W) )
1981 {
1982 chan_counter[channel_swizzle]++;
1983 }
1984 }
1985 }
1986 if( (chan_counter[SQ_SEL_X] > 3)
1987 || (chan_counter[SQ_SEL_Y] > 3)
1988 || (chan_counter[SQ_SEL_Z] > 3)
1989 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
1990 {
1991 bSplitInst = GL_TRUE;
1992 }
1993 }
1994
1995 contiguous_slots_needed = 0;
1996
1997 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
1998 {
1999 contiguous_slots_needed = 4;
2000 }
2001
2002 initialize(pAsm);
2003
2004 for (scalar_channel_index=0;
2005 scalar_channel_index < number_of_scalar_operations;
2006 scalar_channel_index++)
2007 {
2008 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2009 if (alu_instruction_ptr == NULL)
2010 {
2011 return GL_FALSE;
2012 }
2013 Init_R700ALUInstruction(alu_instruction_ptr);
2014
2015 //src 0
2016 current_source_index = 0;
2017 pcurrent_source = &(pAsm->S[0].src);
2018
2019 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2020 current_source_index,
2021 pcurrent_source,
2022 scalar_channel_index) )
2023 {
2024 return GL_FALSE;
2025 }
2026
2027 if (pAsm->D.dst.math == 0)
2028 {
2029 // Process source 1
2030 current_source_index = 1;
2031 pcurrent_source = &(pAsm->S[current_source_index].src);
2032
2033 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2034 current_source_index,
2035 pcurrent_source,
2036 scalar_channel_index) )
2037 {
2038 return GL_FALSE;
2039 }
2040 }
2041
2042 //other bits
2043 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2044
2045 if( (is_single_scalar_operation == GL_TRUE)
2046 || (GL_TRUE == bSplitInst) )
2047 {
2048 alu_instruction_ptr->m_Word0.f.last = 1;
2049 }
2050 else
2051 {
2052 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2053 }
2054
2055 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2056 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2057 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2058
2059 // dst
2060 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2061 (pAsm->D.dst.rtype == DST_REG_OUT) )
2062 {
2063 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2064 }
2065 else
2066 {
2067 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2068 return GL_FALSE;
2069 }
2070
2071 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2072
2073 if ( is_single_scalar_operation == GL_TRUE )
2074 {
2075 // Override scalar_channel_index since only one scalar value will be written
2076 if(pAsm->D.dst.writex)
2077 {
2078 scalar_channel_index = 0;
2079 }
2080 else if(pAsm->D.dst.writey)
2081 {
2082 scalar_channel_index = 1;
2083 }
2084 else if(pAsm->D.dst.writez)
2085 {
2086 scalar_channel_index = 2;
2087 }
2088 else if(pAsm->D.dst.writew)
2089 {
2090 scalar_channel_index = 3;
2091 }
2092 }
2093
2094 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2095
2096 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2097
2098 if (pAsm->D.dst.op3)
2099 {
2100 //op3
2101
2102 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2103
2104 //There's 3rd src for op3
2105 current_source_index = 2;
2106 pcurrent_source = &(pAsm->S[current_source_index].src);
2107
2108 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2109 current_source_index,
2110 pcurrent_source,
2111 scalar_channel_index) )
2112 {
2113 return GL_FALSE;
2114 }
2115 }
2116 else
2117 {
2118 //op2
2119 if (pAsm->bR6xx)
2120 {
2121 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2122
2123 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2124 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2125
2126 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2127 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2128 switch (scalar_channel_index)
2129 {
2130 case 0:
2131 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2132 break;
2133 case 1:
2134 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2135 break;
2136 case 2:
2137 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2138 break;
2139 case 3:
2140 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2141 break;
2142 default:
2143 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2144 break;
2145 }
2146 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2147 }
2148 else
2149 {
2150 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2151
2152 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2153 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2154
2155 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2156 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2157 switch (scalar_channel_index)
2158 {
2159 case 0:
2160 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2161 break;
2162 case 1:
2163 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2164 break;
2165 case 2:
2166 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2167 break;
2168 case 3:
2169 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2170 break;
2171 default:
2172 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2173 break;
2174 }
2175 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2176 }
2177 }
2178
2179 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2180 {
2181 return GL_FALSE;
2182 }
2183
2184 /*
2185 * Judge the type of current instruction, is it vector or scalar
2186 * instruction.
2187 */
2188 if (is_single_scalar_operation)
2189 {
2190 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2191 {
2192 return GL_FALSE;
2193 }
2194 }
2195 else
2196 {
2197 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2198 {
2199 return 1;
2200 }
2201 }
2202
2203 contiguous_slots_needed = 0;
2204 }
2205
2206 return GL_TRUE;
2207 }
2208
2209 GLboolean next_ins(r700_AssemblerBase *pAsm)
2210 {
2211 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2212
2213 if( GL_TRUE == IsTex(pILInst->Opcode) &&
2214 /* handle const moves to temp register */
2215 !(pAsm->D.dst.opcode == SQ_OP2_INST_MOV) )
2216 {
2217 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2218 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2219 {
2220 radeon_error("Error assembling TEX instruction\n");
2221 return GL_FALSE;
2222 }
2223 } else {
2224 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2225 {
2226 radeon_error("Error assembling TEX instruction\n");
2227 return GL_FALSE;
2228 }
2229 }
2230 }
2231 else
2232 { //ALU
2233 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2234 {
2235 radeon_error("Error assembling ALU instruction\n");
2236 return GL_FALSE;
2237 }
2238 }
2239
2240 if(pAsm->D.dst.rtype == DST_REG_OUT)
2241 {
2242 if(pAsm->D.dst.op3)
2243 {
2244 // There is no mask for OP3 instructions, so all channels are written
2245 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2246 }
2247 else
2248 {
2249 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2250 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2251 }
2252 }
2253
2254 //reset for next inst.
2255 pAsm->D.bits = 0;
2256 pAsm->S[0].bits = 0;
2257 pAsm->S[1].bits = 0;
2258 pAsm->S[2].bits = 0;
2259
2260 return GL_TRUE;
2261 }
2262
2263 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2264 {
2265 BITS tmp;
2266
2267 checkop1(pAsm);
2268
2269 tmp = gethelpr(pAsm);
2270
2271 // opcode tmp.x, a.x
2272 // MOV dst, tmp.x
2273
2274 pAsm->D.dst.opcode = opcode;
2275 pAsm->D.dst.math = 1;
2276
2277 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2278 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2279 pAsm->D.dst.reg = tmp;
2280 pAsm->D.dst.writex = 1;
2281
2282 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2283 {
2284 return GL_FALSE;
2285 }
2286
2287 if ( GL_FALSE == next_ins(pAsm) )
2288 {
2289 return GL_FALSE;
2290 }
2291
2292 // Now replicate result to all necessary channels in destination
2293 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2294
2295 if( GL_FALSE == assemble_dst(pAsm) )
2296 {
2297 return GL_FALSE;
2298 }
2299
2300 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2301 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2302 pAsm->S[0].src.reg = tmp;
2303
2304 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2305 noneg_PVSSRC(&(pAsm->S[0].src));
2306
2307 if( GL_FALSE == next_ins(pAsm) )
2308 {
2309 return GL_FALSE;
2310 }
2311
2312 return GL_TRUE;
2313 }
2314
2315 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2316 {
2317 checkop1(pAsm);
2318
2319 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2320
2321 if( GL_FALSE == assemble_dst(pAsm) )
2322 {
2323 return GL_FALSE;
2324 }
2325 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2326 {
2327 return GL_FALSE;
2328 }
2329
2330 pAsm->S[1].bits = pAsm->S[0].bits;
2331 flipneg_PVSSRC(&(pAsm->S[1].src));
2332
2333 if ( GL_FALSE == next_ins(pAsm) )
2334 {
2335 return GL_FALSE;
2336 }
2337
2338 return GL_TRUE;
2339 }
2340
2341 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2342 {
2343 if( GL_FALSE == checkop2(pAsm) )
2344 {
2345 return GL_FALSE;
2346 }
2347
2348 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2349
2350 if( GL_FALSE == assemble_dst(pAsm) )
2351 {
2352 return GL_FALSE;
2353 }
2354
2355 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2356 {
2357 return GL_FALSE;
2358 }
2359
2360 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2361 {
2362 return GL_FALSE;
2363 }
2364
2365 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2366 {
2367 flipneg_PVSSRC(&(pAsm->S[1].src));
2368 }
2369
2370 if( GL_FALSE == next_ins(pAsm) )
2371 {
2372 return GL_FALSE;
2373 }
2374
2375 return GL_TRUE;
2376 }
2377
2378 GLboolean assemble_BAD(char *opcode_str)
2379 {
2380 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2381 return GL_FALSE;
2382 }
2383
2384 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2385 {
2386 int tmp;
2387
2388 if( GL_FALSE == checkop3(pAsm) )
2389 {
2390 return GL_FALSE;
2391 }
2392
2393 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2394 pAsm->D.dst.op3 = 1;
2395
2396 tmp = (-1);
2397
2398 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2399 {
2400 //OP3 has no support for write mask
2401 tmp = gethelpr(pAsm);
2402
2403 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2404 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2405 pAsm->D.dst.reg = tmp;
2406
2407 nomask_PVSDST(&(pAsm->D.dst));
2408 }
2409 else
2410 {
2411 if( GL_FALSE == assemble_dst(pAsm) )
2412 {
2413 return GL_FALSE;
2414 }
2415 }
2416
2417 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2418 {
2419 return GL_FALSE;
2420 }
2421
2422 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2423 {
2424 return GL_FALSE;
2425 }
2426
2427 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2428 {
2429 return GL_FALSE;
2430 }
2431
2432 if ( GL_FALSE == next_ins(pAsm) )
2433 {
2434 return GL_FALSE;
2435 }
2436
2437 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2438 {
2439 if( GL_FALSE == assemble_dst(pAsm) )
2440 {
2441 return GL_FALSE;
2442 }
2443
2444 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2445
2446 //tmp for source
2447 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2448 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2449 pAsm->S[0].src.reg = tmp;
2450
2451 noneg_PVSSRC(&(pAsm->S[0].src));
2452 noswizzle_PVSSRC(&(pAsm->S[0].src));
2453
2454 if( GL_FALSE == next_ins(pAsm) )
2455 {
2456 return GL_FALSE;
2457 }
2458 }
2459
2460 return GL_TRUE;
2461 }
2462
2463 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
2464 {
2465 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
2466 }
2467
2468 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2469 {
2470 if( GL_FALSE == checkop2(pAsm) )
2471 {
2472 return GL_FALSE;
2473 }
2474
2475 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2476
2477 if( GL_FALSE == assemble_dst(pAsm) )
2478 {
2479 return GL_FALSE;
2480 }
2481
2482 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2483 {
2484 return GL_FALSE;
2485 }
2486
2487 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2488 {
2489 return GL_FALSE;
2490 }
2491
2492 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2493 {
2494 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2495 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2496 }
2497 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2498 {
2499 onecomp_PVSSRC(&(pAsm->S[1].src), 3);
2500 }
2501
2502 if ( GL_FALSE == next_ins(pAsm) )
2503 {
2504 return GL_FALSE;
2505 }
2506
2507 return GL_TRUE;
2508 }
2509
2510 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2511 {
2512 if( GL_FALSE == checkop2(pAsm) )
2513 {
2514 return GL_FALSE;
2515 }
2516
2517 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2518
2519 if( GL_FALSE == assemble_dst(pAsm) )
2520 {
2521 return GL_FALSE;
2522 }
2523
2524 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2525 {
2526 return GL_FALSE;
2527 }
2528
2529 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2530 {
2531 return GL_FALSE;
2532 }
2533
2534 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2535 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2536
2537 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2538 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2539
2540 if ( GL_FALSE == next_ins(pAsm) )
2541 {
2542 return GL_FALSE;
2543 }
2544
2545 return GL_TRUE;
2546 }
2547
2548 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2549 {
2550 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2551 }
2552
2553 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
2554 {
2555 checkop1(pAsm);
2556
2557 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2558
2559 if ( GL_FALSE == assemble_dst(pAsm) )
2560 {
2561 return GL_FALSE;
2562 }
2563
2564 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2565 {
2566 return GL_FALSE;
2567 }
2568
2569 if ( GL_FALSE == next_ins(pAsm) )
2570 {
2571 return GL_FALSE;
2572 }
2573
2574 return GL_TRUE;
2575 }
2576
2577 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
2578 {
2579 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
2580 }
2581
2582 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
2583 {
2584 checkop1(pAsm);
2585
2586 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2587
2588 if ( GL_FALSE == assemble_dst(pAsm) )
2589 {
2590 return GL_FALSE;
2591 }
2592
2593 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2594 {
2595 return GL_FALSE;
2596 }
2597
2598 if ( GL_FALSE == next_ins(pAsm) )
2599 {
2600 return GL_FALSE;
2601 }
2602
2603 return GL_TRUE;
2604 }
2605
2606 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
2607 {
2608 checkop1(pAsm);
2609
2610 pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
2611
2612 if ( GL_FALSE == assemble_dst(pAsm) )
2613 {
2614 return GL_FALSE;
2615 }
2616
2617 pAsm->D.dst.writex = 0;
2618 pAsm->D.dst.writey = 0;
2619 pAsm->D.dst.writez = 0;
2620 pAsm->D.dst.writew = 0;
2621
2622 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2623 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2624 pAsm->S[0].src.reg = 0;
2625
2626 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
2627 noneg_PVSSRC(&(pAsm->S[0].src));
2628
2629 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2630
2631 if(PROGRAM_TEMPORARY == pAsm->pILInst[pAsm->uiCurInst].DstReg.File)
2632 {
2633 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
2634 }
2635 else
2636 { //PROGRAM_OUTPUT
2637 pAsm->S[1].src.reg = pAsm->uiFP_OutputMap[pAsm->pILInst[pAsm->uiCurInst].DstReg.Index];
2638 }
2639
2640 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2641 noswizzle_PVSSRC(&(pAsm->S[1].src));
2642
2643 if ( GL_FALSE == next_ins(pAsm) )
2644 {
2645 return GL_FALSE;
2646 }
2647
2648 pAsm->pR700Shader->killIsUsed = GL_TRUE;
2649
2650 return GL_TRUE;
2651 }
2652
2653 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
2654 {
2655 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
2656 }
2657
2658 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
2659 {
2660 BITS tmp;
2661
2662 if( GL_FALSE == checkop3(pAsm) )
2663 {
2664 return GL_FALSE;
2665 }
2666
2667 tmp = gethelpr(pAsm);
2668
2669 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2670
2671 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2672 pAsm->D.dst.reg = tmp;
2673 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2674 nomask_PVSDST(&(pAsm->D.dst));
2675
2676
2677 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
2678 {
2679 return GL_FALSE;
2680 }
2681
2682 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
2683 {
2684 return GL_FALSE;
2685 }
2686
2687 neg_PVSSRC(&(pAsm->S[1].src));
2688
2689 if( GL_FALSE == next_ins(pAsm) )
2690 {
2691 return GL_FALSE;
2692 }
2693
2694 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2695 pAsm->D.dst.op3 = 1;
2696
2697 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2698 pAsm->D.dst.reg = tmp;
2699 nomask_PVSDST(&(pAsm->D.dst));
2700 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2701
2702 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2703 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2704 pAsm->S[0].src.reg = tmp;
2705 noswizzle_PVSSRC(&(pAsm->S[0].src));
2706
2707
2708 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
2709 {
2710 return GL_FALSE;
2711 }
2712 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2713 {
2714 return GL_FALSE;
2715 }
2716
2717 if( GL_FALSE == next_ins(pAsm) )
2718 {
2719 return GL_FALSE;
2720 }
2721
2722 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2723
2724 if( GL_FALSE == assemble_dst(pAsm) )
2725 {
2726 return GL_FALSE;
2727 }
2728
2729 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2730 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2731 pAsm->S[0].src.reg = tmp;
2732 noswizzle_PVSSRC(&(pAsm->S[0].src));
2733
2734 if( GL_FALSE == next_ins(pAsm) )
2735 {
2736 return GL_FALSE;
2737 }
2738
2739 return GL_TRUE;
2740 }
2741
2742 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
2743 {
2744 int tmp, ii;
2745 GLboolean bReplaceDst = GL_FALSE;
2746 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2747
2748 if( GL_FALSE == checkop3(pAsm) )
2749 {
2750 return GL_FALSE;
2751 }
2752
2753 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2754 pAsm->D.dst.op3 = 1;
2755
2756 tmp = (-1);
2757
2758 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
2759 { /* TODO : more investigation on MAD src and dst using same register */
2760 for(ii=0; ii<3; ii++)
2761 {
2762 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
2763 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
2764 {
2765 bReplaceDst = GL_TRUE;
2766 break;
2767 }
2768 }
2769 }
2770 if(0xF != pILInst->DstReg.WriteMask)
2771 { /* OP3 has no support for write mask */
2772 bReplaceDst = GL_TRUE;
2773 }
2774
2775 if(GL_TRUE == bReplaceDst)
2776 {
2777 tmp = gethelpr(pAsm);
2778
2779 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2780 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2781 pAsm->D.dst.reg = tmp;
2782
2783 nomask_PVSDST(&(pAsm->D.dst));
2784 }
2785 else
2786 {
2787 if( GL_FALSE == assemble_dst(pAsm) )
2788 {
2789 return GL_FALSE;
2790 }
2791 }
2792
2793 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2794 {
2795 return GL_FALSE;
2796 }
2797
2798 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2799 {
2800 return GL_FALSE;
2801 }
2802
2803 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2804 {
2805 return GL_FALSE;
2806 }
2807
2808 if ( GL_FALSE == next_ins(pAsm) )
2809 {
2810 return GL_FALSE;
2811 }
2812
2813 if (GL_TRUE == bReplaceDst)
2814 {
2815 if( GL_FALSE == assemble_dst(pAsm) )
2816 {
2817 return GL_FALSE;
2818 }
2819
2820 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2821
2822 //tmp for source
2823 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2824 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2825 pAsm->S[0].src.reg = tmp;
2826
2827 noneg_PVSSRC(&(pAsm->S[0].src));
2828 noswizzle_PVSSRC(&(pAsm->S[0].src));
2829
2830 if( GL_FALSE == next_ins(pAsm) )
2831 {
2832 return GL_FALSE;
2833 }
2834 }
2835
2836 return GL_TRUE;
2837 }
2838
2839 /* LIT dst, src */
2840 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
2841 {
2842 unsigned int dstReg;
2843 unsigned int dstType;
2844 unsigned int srcReg;
2845 unsigned int srcType;
2846 checkop1(pAsm);
2847 int tmp = gethelpr(pAsm);
2848
2849 if( GL_FALSE == assemble_dst(pAsm) )
2850 {
2851 return GL_FALSE;
2852 }
2853 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2854 {
2855 return GL_FALSE;
2856 }
2857 dstReg = pAsm->D.dst.reg;
2858 dstType = pAsm->D.dst.rtype;
2859 srcReg = pAsm->S[0].src.reg;
2860 srcType = pAsm->S[0].src.rtype;
2861
2862 /* dst.xw, <- 1.0 */
2863 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2864 pAsm->D.dst.rtype = dstType;
2865 pAsm->D.dst.reg = dstReg;
2866 pAsm->D.dst.writex = 1;
2867 pAsm->D.dst.writey = 0;
2868 pAsm->D.dst.writez = 0;
2869 pAsm->D.dst.writew = 1;
2870 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2871 pAsm->S[0].src.reg = tmp;
2872 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2873 noneg_PVSSRC(&(pAsm->S[0].src));
2874 pAsm->S[0].src.swizzlex = SQ_SEL_1;
2875 pAsm->S[0].src.swizzley = SQ_SEL_1;
2876 pAsm->S[0].src.swizzlez = SQ_SEL_1;
2877 pAsm->S[0].src.swizzlew = SQ_SEL_1;
2878 if( GL_FALSE == next_ins(pAsm) )
2879 {
2880 return GL_FALSE;
2881 }
2882
2883 /* dst.y = max(src.x, 0.0) */
2884 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2885 pAsm->D.dst.rtype = dstType;
2886 pAsm->D.dst.reg = dstReg;
2887 pAsm->D.dst.writex = 0;
2888 pAsm->D.dst.writey = 1;
2889 pAsm->D.dst.writez = 0;
2890 pAsm->D.dst.writew = 0;
2891 pAsm->S[0].src.rtype = srcType;
2892 pAsm->S[0].src.reg = srcReg;
2893 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2894 noneg_PVSSRC(&(pAsm->S[0].src));
2895 pAsm->S[0].src.swizzlex = SQ_SEL_X;
2896 pAsm->S[0].src.swizzley = SQ_SEL_X;
2897 pAsm->S[0].src.swizzlez = SQ_SEL_X;
2898 pAsm->S[0].src.swizzlew = SQ_SEL_X;
2899 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2900 pAsm->S[1].src.reg = tmp;
2901 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2902 noneg_PVSSRC(&(pAsm->S[1].src));
2903 pAsm->S[1].src.swizzlex = SQ_SEL_0;
2904 pAsm->S[1].src.swizzley = SQ_SEL_0;
2905 pAsm->S[1].src.swizzlez = SQ_SEL_0;
2906 pAsm->S[1].src.swizzlew = SQ_SEL_0;
2907 if( GL_FALSE == next_ins(pAsm) )
2908 {
2909 return GL_FALSE;
2910 }
2911
2912 /* before: dst.w = log(src.y)
2913 * after : dst.x = log(src.y)
2914 * why change dest register is that dst.w has been initialized as 1 before
2915 */
2916 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
2917 pAsm->D.dst.math = 1;
2918 pAsm->D.dst.rtype = dstType;
2919 pAsm->D.dst.reg = dstReg;
2920 pAsm->D.dst.writex = 1;
2921 pAsm->D.dst.writey = 0;
2922 pAsm->D.dst.writez = 0;
2923 pAsm->D.dst.writew = 0;
2924 pAsm->S[0].src.rtype = srcType;
2925 pAsm->S[0].src.reg = srcReg;
2926 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2927 noneg_PVSSRC(&(pAsm->S[0].src));
2928 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
2929 pAsm->S[0].src.swizzley = SQ_SEL_Y;
2930 pAsm->S[0].src.swizzlez = SQ_SEL_Y;
2931 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
2932 if( GL_FALSE == next_ins(pAsm) )
2933 {
2934 return GL_FALSE;
2935 }
2936
2937 /* before: tmp.x = amd MUL_LIT(src.w, dst.w, src.x ) */
2938 /* after : tmp.x = amd MUL_LIT(src.w, dst.x, src.x ) */
2939 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
2940 pAsm->D.dst.op3 = 1;
2941 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2942 pAsm->D.dst.reg = tmp;
2943 pAsm->D.dst.writex = 1;
2944 pAsm->D.dst.writey = 0;
2945 pAsm->D.dst.writez = 0;
2946 pAsm->D.dst.writew = 0;
2947
2948 pAsm->S[0].src.rtype = srcType;
2949 pAsm->S[0].src.reg = srcReg;
2950 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2951 noneg_PVSSRC(&(pAsm->S[0].src));
2952 pAsm->S[0].src.swizzlex = SQ_SEL_W;
2953 pAsm->S[0].src.swizzley = SQ_SEL_W;
2954 pAsm->S[0].src.swizzlez = SQ_SEL_W;
2955 pAsm->S[0].src.swizzlew = SQ_SEL_W;
2956
2957 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2958 pAsm->S[1].src.reg = dstReg;
2959 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2960 noneg_PVSSRC(&(pAsm->S[1].src));
2961 pAsm->S[1].src.swizzlex = SQ_SEL_X;
2962 pAsm->S[1].src.swizzley = SQ_SEL_X;
2963 pAsm->S[1].src.swizzlez = SQ_SEL_X;
2964 pAsm->S[1].src.swizzlew = SQ_SEL_X;
2965
2966 pAsm->S[2].src.rtype = srcType;
2967 pAsm->S[2].src.reg = srcReg;
2968 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
2969 noneg_PVSSRC(&(pAsm->S[2].src));
2970 pAsm->S[2].src.swizzlex = SQ_SEL_X;
2971 pAsm->S[2].src.swizzley = SQ_SEL_X;
2972 pAsm->S[2].src.swizzlez = SQ_SEL_X;
2973 pAsm->S[2].src.swizzlew = SQ_SEL_X;
2974
2975 if( GL_FALSE == next_ins(pAsm) )
2976 {
2977 return GL_FALSE;
2978 }
2979
2980 /* dst.z = exp(tmp.x) */
2981 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
2982 pAsm->D.dst.math = 1;
2983 pAsm->D.dst.rtype = dstType;
2984 pAsm->D.dst.reg = dstReg;
2985 pAsm->D.dst.writex = 0;
2986 pAsm->D.dst.writey = 0;
2987 pAsm->D.dst.writez = 1;
2988 pAsm->D.dst.writew = 0;
2989
2990 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2991 pAsm->S[0].src.reg = tmp;
2992 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2993 noneg_PVSSRC(&(pAsm->S[0].src));
2994 pAsm->S[0].src.swizzlex = SQ_SEL_X;
2995 pAsm->S[0].src.swizzley = SQ_SEL_X;
2996 pAsm->S[0].src.swizzlez = SQ_SEL_X;
2997 pAsm->S[0].src.swizzlew = SQ_SEL_X;
2998
2999 if( GL_FALSE == next_ins(pAsm) )
3000 {
3001 return GL_FALSE;
3002 }
3003
3004 return GL_TRUE;
3005 }
3006
3007 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3008 {
3009 if( GL_FALSE == checkop2(pAsm) )
3010 {
3011 return GL_FALSE;
3012 }
3013
3014 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3015
3016 if( GL_FALSE == assemble_dst(pAsm) )
3017 {
3018 return GL_FALSE;
3019 }
3020
3021 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3022 {
3023 return GL_FALSE;
3024 }
3025
3026 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3027 {
3028 return GL_FALSE;
3029 }
3030
3031 if( GL_FALSE == next_ins(pAsm) )
3032 {
3033 return GL_FALSE;
3034 }
3035
3036 return GL_TRUE;
3037 }
3038
3039 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3040 {
3041 if( GL_FALSE == checkop2(pAsm) )
3042 {
3043 return GL_FALSE;
3044 }
3045
3046 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3047
3048 if( GL_FALSE == assemble_dst(pAsm) )
3049 {
3050 return GL_FALSE;
3051 }
3052
3053 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3054 {
3055 return GL_FALSE;
3056 }
3057
3058 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3059 {
3060 return GL_FALSE;
3061 }
3062
3063 if( GL_FALSE == next_ins(pAsm) )
3064 {
3065 return GL_FALSE;
3066 }
3067
3068 return GL_TRUE;
3069 }
3070
3071 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3072 {
3073 checkop1(pAsm);
3074
3075 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3076
3077 if (GL_FALSE == assemble_dst(pAsm))
3078 {
3079 return GL_FALSE;
3080 }
3081
3082 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3083 {
3084 return GL_FALSE;
3085 }
3086
3087 if ( GL_FALSE == next_ins(pAsm) )
3088 {
3089 return GL_FALSE;
3090 }
3091
3092 return GL_TRUE;
3093 }
3094
3095 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3096 {
3097 if( GL_FALSE == checkop2(pAsm) )
3098 {
3099 return GL_FALSE;
3100 }
3101
3102 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3103
3104 if( GL_FALSE == assemble_dst(pAsm) )
3105 {
3106 return GL_FALSE;
3107 }
3108
3109 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3110 {
3111 return GL_FALSE;
3112 }
3113
3114 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3115 {
3116 return GL_FALSE;
3117 }
3118
3119 if( GL_FALSE == next_ins(pAsm) )
3120 {
3121 return GL_FALSE;
3122 }
3123
3124 return GL_TRUE;
3125 }
3126
3127 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3128 {
3129 BITS tmp;
3130
3131 checkop1(pAsm);
3132
3133 tmp = gethelpr(pAsm);
3134
3135 // LG2 tmp.x, a.swizzle
3136 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3137 pAsm->D.dst.math = 1;
3138
3139 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3140 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3141 pAsm->D.dst.reg = tmp;
3142 nomask_PVSDST(&(pAsm->D.dst));
3143
3144 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3145 {
3146 return GL_FALSE;
3147 }
3148
3149 if( GL_FALSE == next_ins(pAsm) )
3150 {
3151 return GL_FALSE;
3152 }
3153
3154 // MUL tmp.x, tmp.x, b.swizzle
3155 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3156
3157 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3158 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3159 pAsm->D.dst.reg = tmp;
3160 nomask_PVSDST(&(pAsm->D.dst));
3161
3162 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3163 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3164 pAsm->S[0].src.reg = tmp;
3165 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3166 noneg_PVSSRC(&(pAsm->S[0].src));
3167
3168 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3169 {
3170 return GL_FALSE;
3171 }
3172
3173 if( GL_FALSE == next_ins(pAsm) )
3174 {
3175 return GL_FALSE;
3176 }
3177
3178 // EX2 dst.mask, tmp.x
3179 // EX2 tmp.x, tmp.x
3180 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3181 pAsm->D.dst.math = 1;
3182
3183 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3184 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3185 pAsm->D.dst.reg = tmp;
3186 nomask_PVSDST(&(pAsm->D.dst));
3187
3188 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3189 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3190 pAsm->S[0].src.reg = tmp;
3191 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3192 noneg_PVSSRC(&(pAsm->S[0].src));
3193
3194 if( GL_FALSE == next_ins(pAsm) )
3195 {
3196 return GL_FALSE;
3197 }
3198
3199 // Now replicate result to all necessary channels in destination
3200 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3201
3202 if( GL_FALSE == assemble_dst(pAsm) )
3203 {
3204 return GL_FALSE;
3205 }
3206
3207 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3208 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3209 pAsm->S[0].src.reg = tmp;
3210
3211 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3212 noneg_PVSSRC(&(pAsm->S[0].src));
3213
3214 if( GL_FALSE == next_ins(pAsm) )
3215 {
3216 return GL_FALSE;
3217 }
3218
3219 return GL_TRUE;
3220 }
3221
3222 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
3223 {
3224 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
3225 }
3226
3227 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
3228 {
3229 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
3230 }
3231
3232 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
3233 {
3234 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
3235 }
3236
3237 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
3238 {
3239 BITS tmp;
3240
3241 checkop1(pAsm);
3242
3243 tmp = gethelpr(pAsm);
3244
3245 // COS tmp.x, a.x
3246 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
3247 pAsm->D.dst.math = 1;
3248
3249 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3250 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3251 pAsm->D.dst.reg = tmp;
3252 pAsm->D.dst.writex = 1;
3253
3254 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3255 {
3256 return GL_FALSE;
3257 }
3258
3259 if ( GL_FALSE == next_ins(pAsm) )
3260 {
3261 return GL_FALSE;
3262 }
3263
3264 // SIN tmp.y, a.x
3265 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
3266 pAsm->D.dst.math = 1;
3267
3268 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3269 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3270 pAsm->D.dst.reg = tmp;
3271 pAsm->D.dst.writey = 1;
3272
3273 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3274 {
3275 return GL_FALSE;
3276 }
3277
3278 if( GL_FALSE == next_ins(pAsm) )
3279 {
3280 return GL_FALSE;
3281 }
3282
3283 // MOV dst.mask, tmp
3284 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3285
3286 if( GL_FALSE == assemble_dst(pAsm) )
3287 {
3288 return GL_FALSE;
3289 }
3290
3291 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3292 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3293 pAsm->S[0].src.reg = tmp;
3294
3295 noswizzle_PVSSRC(&(pAsm->S[0].src));
3296 pAsm->S[0].src.swizzlez = SQ_SEL_0;
3297 pAsm->S[0].src.swizzlew = SQ_SEL_0;
3298
3299 if ( GL_FALSE == next_ins(pAsm) )
3300 {
3301 return GL_FALSE;
3302 }
3303
3304 return GL_TRUE;
3305 }
3306
3307 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
3308 {
3309 if( GL_FALSE == checkop2(pAsm) )
3310 {
3311 return GL_FALSE;
3312 }
3313
3314 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
3315
3316 if( GL_FALSE == assemble_dst(pAsm) )
3317 {
3318 return GL_FALSE;
3319 }
3320
3321 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3322 {
3323 return GL_FALSE;
3324 }
3325
3326 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3327 {
3328 return GL_FALSE;
3329 }
3330
3331 if( GL_FALSE == next_ins(pAsm) )
3332 {
3333 return GL_FALSE;
3334 }
3335
3336 return GL_TRUE;
3337 }
3338
3339 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
3340 {
3341 if( GL_FALSE == checkop2(pAsm) )
3342 {
3343 return GL_FALSE;
3344 }
3345
3346 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
3347
3348 if( GL_FALSE == assemble_dst(pAsm) )
3349 {
3350 return GL_FALSE;
3351 }
3352
3353 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3354 {
3355 return GL_FALSE;
3356 }
3357
3358 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3359 {
3360 return GL_FALSE;
3361 }
3362
3363 if( GL_FALSE == next_ins(pAsm) )
3364 {
3365 return GL_FALSE;
3366 }
3367
3368 return GL_TRUE;
3369 }
3370
3371 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
3372 {
3373 return GL_TRUE;
3374 }
3375
3376 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
3377 {
3378 GLboolean src_const;
3379
3380 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
3381 {
3382 case PROGRAM_CONSTANT:
3383 case PROGRAM_LOCAL_PARAM:
3384 case PROGRAM_ENV_PARAM:
3385 case PROGRAM_STATE_VAR:
3386 src_const = GL_TRUE;
3387 break;
3388 case PROGRAM_TEMPORARY:
3389 case PROGRAM_INPUT:
3390 default:
3391 src_const = GL_FALSE;
3392 break;
3393 }
3394
3395 if (GL_TRUE == src_const)
3396 {
3397 if ( GL_FALSE == mov_temp(pAsm, 0) )
3398 return GL_FALSE;
3399 }
3400
3401 switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
3402 {
3403 case OPCODE_TEX:
3404 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
3405 break;
3406 case OPCODE_TXB:
3407 radeon_error("do not support TXB yet\n");
3408 return GL_FALSE;
3409 break;
3410 case OPCODE_TXP:
3411 /* TODO : tex proj version : divid first 3 components by 4th */
3412 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
3413 break;
3414 default:
3415 radeon_error("Internal error: bad texture op (not TEX)\n");
3416 return GL_FALSE;
3417 break;
3418 }
3419
3420 // Set src1 to tex unit id
3421 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
3422 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3423
3424 //No sw info from mesa compiler, so hard code here.
3425 pAsm->S[1].src.swizzlex = SQ_SEL_X;
3426 pAsm->S[1].src.swizzley = SQ_SEL_Y;
3427 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3428 pAsm->S[1].src.swizzlew = SQ_SEL_W;
3429
3430 if( GL_FALSE == tex_dst(pAsm) )
3431 {
3432 return GL_FALSE;
3433 }
3434
3435 if( GL_FALSE == tex_src(pAsm) )
3436 {
3437 return GL_FALSE;
3438 }
3439
3440 if ( GL_FALSE == next_ins(pAsm) )
3441 {
3442 return GL_FALSE;
3443 }
3444
3445 return GL_TRUE;
3446 }
3447
3448 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
3449 {
3450 BITS tmp;
3451
3452 if( GL_FALSE == checkop2(pAsm) )
3453 {
3454 return GL_FALSE;
3455 }
3456
3457 tmp = gethelpr(pAsm);
3458
3459 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3460
3461 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3462 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3463 pAsm->D.dst.reg = tmp;
3464 nomask_PVSDST(&(pAsm->D.dst));
3465
3466 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3467 {
3468 return GL_FALSE;
3469 }
3470
3471 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3472 {
3473 return GL_FALSE;
3474 }
3475
3476 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3477 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3478
3479 if( GL_FALSE == next_ins(pAsm) )
3480 {
3481 return GL_FALSE;
3482 }
3483
3484 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3485 pAsm->D.dst.op3 = 1;
3486
3487 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3488 {
3489 tmp = gethelpr(pAsm);
3490
3491 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3492 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3493 pAsm->D.dst.reg = tmp;
3494
3495 nomask_PVSDST(&(pAsm->D.dst));
3496 }
3497 else
3498 {
3499 if( GL_FALSE == assemble_dst(pAsm) )
3500 {
3501 return GL_FALSE;
3502 }
3503 }
3504
3505 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3506 {
3507 return GL_FALSE;
3508 }
3509
3510 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3511 {
3512 return GL_FALSE;
3513 }
3514
3515 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3516 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3517
3518 // result1 + (neg) result0
3519 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
3520 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
3521 pAsm->S[2].src.reg = tmp;
3522
3523 neg_PVSSRC(&(pAsm->S[2].src));
3524 noswizzle_PVSSRC(&(pAsm->S[2].src));
3525
3526 if( GL_FALSE == next_ins(pAsm) )
3527 {
3528 return GL_FALSE;
3529 }
3530
3531
3532 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3533 {
3534 if( GL_FALSE == assemble_dst(pAsm) )
3535 {
3536 return GL_FALSE;
3537 }
3538
3539 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3540
3541 // Use tmp as source
3542 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3543 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3544 pAsm->S[0].src.reg = tmp;
3545
3546 noneg_PVSSRC(&(pAsm->S[0].src));
3547 noswizzle_PVSSRC(&(pAsm->S[0].src));
3548
3549 if( GL_FALSE == next_ins(pAsm) )
3550 {
3551 return GL_FALSE;
3552 }
3553 }
3554
3555 return GL_TRUE;
3556 }
3557
3558 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
3559 {
3560 return GL_TRUE;
3561 }
3562
3563 GLboolean assemble_IF(r700_AssemblerBase *pAsm)
3564 {
3565 return GL_TRUE;
3566 }
3567
3568 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
3569 {
3570 return GL_TRUE;
3571 }
3572
3573 GLboolean AssembleInstr(GLuint uiNumberInsts,
3574 struct prog_instruction *pILInst,
3575 r700_AssemblerBase *pR700AsmCode)
3576 {
3577 GLuint i;
3578
3579 pR700AsmCode->pILInst = pILInst;
3580 for(i=0; i<uiNumberInsts; i++)
3581 {
3582 pR700AsmCode->uiCurInst = i;
3583
3584 switch (pILInst[i].Opcode)
3585 {
3586 case OPCODE_ABS:
3587 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
3588 return GL_FALSE;
3589 break;
3590 case OPCODE_ADD:
3591 case OPCODE_SUB:
3592 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
3593 return GL_FALSE;
3594 break;
3595
3596 case OPCODE_ARL:
3597 radeon_error("Not yet implemented instruction OPCODE_ARL \n");
3598 //if ( GL_FALSE == assemble_BAD("ARL") )
3599 return GL_FALSE;
3600 break;
3601 case OPCODE_ARR:
3602 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
3603 //if ( GL_FALSE == assemble_BAD("ARR") )
3604 return GL_FALSE;
3605 break;
3606
3607 case OPCODE_CMP:
3608 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
3609 return GL_FALSE;
3610 break;
3611 case OPCODE_COS:
3612 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
3613 return GL_FALSE;
3614 break;
3615
3616 case OPCODE_DP3:
3617 case OPCODE_DP4:
3618 case OPCODE_DPH:
3619 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
3620 return GL_FALSE;
3621 break;
3622
3623 case OPCODE_DST:
3624 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
3625 return GL_FALSE;
3626 break;
3627
3628 case OPCODE_EX2:
3629 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
3630 return GL_FALSE;
3631 break;
3632 case OPCODE_EXP:
3633 radeon_error("Not yet implemented instruction OPCODE_EXP \n");
3634 //if ( GL_FALSE == assemble_BAD("EXP") )
3635 return GL_FALSE;
3636 break; // approx of EX2
3637
3638 case OPCODE_FLR:
3639 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
3640 return GL_FALSE;
3641 break;
3642 //case OP_FLR_INT:
3643 // if ( GL_FALSE == assemble_FLR_INT() )
3644 // return GL_FALSE;
3645 // break;
3646
3647 case OPCODE_FRC:
3648 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
3649 return GL_FALSE;
3650 break;
3651
3652 case OPCODE_KIL:
3653 if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
3654 return GL_FALSE;
3655 break;
3656 case OPCODE_LG2:
3657 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
3658 return GL_FALSE;
3659 break;
3660 case OPCODE_LIT:
3661 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
3662 return GL_FALSE;
3663 break;
3664 case OPCODE_LRP:
3665 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
3666 return GL_FALSE;
3667 break;
3668 case OPCODE_LOG:
3669 radeon_error("Not yet implemented instruction OPCODE_LOG \n");
3670 //if ( GL_FALSE == assemble_BAD("LOG") )
3671 return GL_FALSE;
3672 break; // approx of LG2
3673
3674 case OPCODE_MAD:
3675 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
3676 return GL_FALSE;
3677 break;
3678 case OPCODE_MAX:
3679 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
3680 return GL_FALSE;
3681 break;
3682 case OPCODE_MIN:
3683 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
3684 return GL_FALSE;
3685 break;
3686
3687 case OPCODE_MOV:
3688 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3689 return GL_FALSE;
3690 break;
3691 case OPCODE_MUL:
3692 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
3693 return GL_FALSE;
3694 break;
3695
3696 case OPCODE_POW:
3697 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
3698 return GL_FALSE;
3699 break;
3700 case OPCODE_RCP:
3701 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
3702 return GL_FALSE;
3703 break;
3704 case OPCODE_RSQ:
3705 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
3706 return GL_FALSE;
3707 break;
3708 case OPCODE_SIN:
3709 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
3710 return GL_FALSE;
3711 break;
3712 case OPCODE_SCS:
3713 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
3714 return GL_FALSE;
3715 break;
3716
3717 case OPCODE_SGE:
3718 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
3719 return GL_FALSE;
3720 break;
3721 case OPCODE_SLT:
3722 if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
3723 return GL_FALSE;
3724 break;
3725
3726 //case OP_STP:
3727 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
3728 // return GL_FALSE;
3729 // break;
3730
3731 case OPCODE_SWZ:
3732 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3733 {
3734 return GL_FALSE;
3735 }
3736 else
3737 {
3738 if( (i+1)<uiNumberInsts )
3739 {
3740 if(OPCODE_END != pILInst[i+1].Opcode)
3741 {
3742 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
3743 {
3744 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
3745 }
3746 }
3747 }
3748 }
3749 break;
3750
3751 case OPCODE_TEX:
3752 case OPCODE_TXB:
3753 case OPCODE_TXP:
3754 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
3755 return GL_FALSE;
3756 break;
3757
3758 case OPCODE_XPD:
3759 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
3760 return GL_FALSE;
3761 break;
3762
3763 case OPCODE_IF :
3764 if ( GL_FALSE == assemble_IF(pR700AsmCode) )
3765 return GL_FALSE;
3766 break;
3767 case OPCODE_ELSE :
3768 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
3769 //if ( GL_FALSE == assemble_BAD("ELSE") )
3770 return GL_FALSE;
3771 break;
3772 case OPCODE_ENDIF:
3773 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
3774 return GL_FALSE;
3775 break;
3776
3777 //case OPCODE_EXPORT:
3778 // if ( GL_FALSE == assemble_EXPORT() )
3779 // return GL_FALSE;
3780 // break;
3781
3782 case OPCODE_END:
3783 //pR700AsmCode->uiCurInst = i;
3784 //This is to remaind that if in later exoort there is depth/stencil
3785 //export, we need a mov to re-arrange DST channel, where using a
3786 //psuedo inst, we will use this end inst to do it.
3787 return GL_TRUE;
3788
3789 default:
3790 radeon_error("internal: unknown instruction\n");
3791 return GL_FALSE;
3792 }
3793 }
3794
3795 return GL_TRUE;
3796 }
3797
3798 GLboolean Process_Export(r700_AssemblerBase* pAsm,
3799 GLuint type,
3800 GLuint export_starting_index,
3801 GLuint export_count,
3802 GLuint starting_register_number,
3803 GLboolean is_depth_export)
3804 {
3805 unsigned char ucWriteMask;
3806
3807 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
3808 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
3809
3810 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
3811
3812 switch (type)
3813 {
3814 case SQ_EXPORT_PIXEL:
3815 if(GL_TRUE == is_depth_export)
3816 {
3817 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
3818 }
3819 else
3820 {
3821 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
3822 }
3823 break;
3824
3825 case SQ_EXPORT_POS:
3826 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
3827 break;
3828
3829 case SQ_EXPORT_PARAM:
3830 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
3831 break;
3832
3833 default:
3834 radeon_error("Unknown export type: %d\n", type);
3835 return GL_FALSE;
3836 break;
3837 }
3838
3839 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
3840
3841 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
3842 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
3843 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
3844
3845 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
3846 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
3847 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
3848 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
3849 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
3850 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
3851
3852 if (export_count == 1)
3853 {
3854 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
3855 /* exports Z as a float into Red channel */
3856 if (GL_TRUE == is_depth_export)
3857 ucWriteMask = 0x1;
3858
3859 if( (ucWriteMask & 0x1) != 0)
3860 {
3861 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
3862 }
3863 else
3864 {
3865 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
3866 }
3867 if( ((ucWriteMask>>1) & 0x1) != 0)
3868 {
3869 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
3870 }
3871 else
3872 {
3873 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
3874 }
3875 if( ((ucWriteMask>>2) & 0x1) != 0)
3876 {
3877 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
3878 }
3879 else
3880 {
3881 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
3882 }
3883 if( ((ucWriteMask>>3) & 0x1) != 0)
3884 {
3885 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
3886 }
3887 else
3888 {
3889 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
3890 }
3891 }
3892 else
3893 {
3894 // This should only be used if all components for all registers have been written
3895 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
3896 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
3897 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
3898 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
3899 }
3900
3901 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
3902
3903 return GL_TRUE;
3904 }
3905
3906 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
3907 {
3908 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
3909 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
3910
3911 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
3912
3913 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3914
3915 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3916 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3917 pAsm->D.dst.reg = pAsm->depth_export_register_number;
3918
3919 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
3920
3921 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3922 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3923 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
3924
3925 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
3926
3927 noneg_PVSSRC(&(pAsm->S[0].src));
3928
3929 if( GL_FALSE == next_ins(pAsm) )
3930 {
3931 return GL_FALSE;
3932 }
3933
3934 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
3935
3936 return GL_TRUE;
3937 }
3938
3939 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
3940 GLbitfield OutputsWritten)
3941 {
3942 unsigned int unBit;
3943
3944 if(pR700AsmCode->depth_export_register_number >= 0)
3945 {
3946 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
3947 {
3948 return GL_FALSE;
3949 }
3950 }
3951
3952 unBit = 1 << FRAG_RESULT_COLOR;
3953 if(OutputsWritten & unBit)
3954 {
3955 if( GL_FALSE == Process_Export(pR700AsmCode,
3956 SQ_EXPORT_PIXEL,
3957 0,
3958 1,
3959 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
3960 GL_FALSE) )
3961 {
3962 return GL_FALSE;
3963 }
3964 }
3965 unBit = 1 << FRAG_RESULT_DEPTH;
3966 if(OutputsWritten & unBit)
3967 {
3968 if( GL_FALSE == Process_Export(pR700AsmCode,
3969 SQ_EXPORT_PIXEL,
3970 0,
3971 1,
3972 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
3973 GL_TRUE))
3974 {
3975 return GL_FALSE;
3976 }
3977 }
3978
3979 if(pR700AsmCode->cf_last_export_ptr != NULL)
3980 {
3981 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
3982 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
3983 }
3984
3985 return GL_TRUE;
3986 }
3987
3988 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
3989 GLbitfield OutputsWritten)
3990 {
3991 unsigned int unBit;
3992 unsigned int i;
3993
3994 GLuint export_starting_index = 0;
3995 GLuint export_count = pR700AsmCode->number_of_exports;
3996
3997 unBit = 1 << VERT_RESULT_HPOS;
3998 if(OutputsWritten & unBit)
3999 {
4000 if( GL_FALSE == Process_Export(pR700AsmCode,
4001 SQ_EXPORT_POS,
4002 export_starting_index,
4003 1,
4004 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
4005 GL_FALSE) )
4006 {
4007 return GL_FALSE;
4008 }
4009
4010 export_count--;
4011
4012 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4013 }
4014
4015 pR700AsmCode->number_of_exports = export_count;
4016
4017 unBit = 1 << VERT_RESULT_COL0;
4018 if(OutputsWritten & unBit)
4019 {
4020 if( GL_FALSE == Process_Export(pR700AsmCode,
4021 SQ_EXPORT_PARAM,
4022 export_starting_index,
4023 1,
4024 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
4025 GL_FALSE) )
4026 {
4027 return GL_FALSE;
4028 }
4029
4030 export_starting_index++;
4031 }
4032
4033 unBit = 1 << VERT_RESULT_COL1;
4034 if(OutputsWritten & unBit)
4035 {
4036 if( GL_FALSE == Process_Export(pR700AsmCode,
4037 SQ_EXPORT_PARAM,
4038 export_starting_index,
4039 1,
4040 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
4041 GL_FALSE) )
4042 {
4043 return GL_FALSE;
4044 }
4045
4046 export_starting_index++;
4047 }
4048
4049 unBit = 1 << VERT_RESULT_FOGC;
4050 if(OutputsWritten & unBit)
4051 {
4052 if( GL_FALSE == Process_Export(pR700AsmCode,
4053 SQ_EXPORT_PARAM,
4054 export_starting_index,
4055 1,
4056 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
4057 GL_FALSE) )
4058 {
4059 return GL_FALSE;
4060 }
4061
4062 export_starting_index++;
4063 }
4064
4065 for(i=0; i<8; i++)
4066 {
4067 unBit = 1 << (VERT_RESULT_TEX0 + i);
4068 if(OutputsWritten & unBit)
4069 {
4070 if( GL_FALSE == Process_Export(pR700AsmCode,
4071 SQ_EXPORT_PARAM,
4072 export_starting_index,
4073 1,
4074 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
4075 GL_FALSE) )
4076 {
4077 return GL_FALSE;
4078 }
4079
4080 export_starting_index++;
4081 }
4082 }
4083
4084 // At least one param should be exported
4085 if (export_count)
4086 {
4087 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4088 }
4089 else
4090 {
4091 if( GL_FALSE == Process_Export(pR700AsmCode,
4092 SQ_EXPORT_PARAM,
4093 0,
4094 1,
4095 pR700AsmCode->starting_export_register_number,
4096 GL_FALSE) )
4097 {
4098 return GL_FALSE;
4099 }
4100
4101 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
4102 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
4103 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
4104 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
4105 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4106 }
4107
4108 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4109
4110 return GL_TRUE;
4111 }
4112
4113 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
4114 {
4115 FREE(pR700AsmCode->pucOutMask);
4116 FREE(pR700AsmCode->pInstDeps);
4117 return GL_TRUE;
4118 }
4119