r600: FRAG_ATTRIB_WPOS and FRAG_ATTRIB_FOGC appear to be supported.
[mesa.git] / src / mesa / drivers / dri / r600 / r700_assembler.c
1 /*
2 * Copyright (C) 2008-2009 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 /*
23 * Authors:
24 * Richard Li <RichardZ.Li@amd.com>, <richardradeon@gmail.com>
25 */
26
27 #include <stdio.h>
28 #include <stdarg.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32
33 #include "main/mtypes.h"
34 #include "main/imports.h"
35
36 #include "radeon_debug.h"
37 #include "r600_context.h"
38
39 #include "r700_assembler.h"
40
41 BITS addrmode_PVSDST(PVSDST * pPVSDST)
42 {
43 return pPVSDST->addrmode0 | ((BITS)pPVSDST->addrmode1 << 1);
44 }
45
46 void setaddrmode_PVSDST(PVSDST * pPVSDST, BITS addrmode)
47 {
48 pPVSDST->addrmode0 = addrmode & 1;
49 pPVSDST->addrmode1 = (addrmode >> 1) & 1;
50 }
51
52 void nomask_PVSDST(PVSDST * pPVSDST)
53 {
54 pPVSDST->writex = pPVSDST->writey = pPVSDST->writez = pPVSDST->writew = 1;
55 }
56
57 BITS addrmode_PVSSRC(PVSSRC* pPVSSRC)
58 {
59 return pPVSSRC->addrmode0 | ((BITS)pPVSSRC->addrmode1 << 1);
60 }
61
62 void setaddrmode_PVSSRC(PVSSRC* pPVSSRC, BITS addrmode)
63 {
64 pPVSSRC->addrmode0 = addrmode & 1;
65 pPVSSRC->addrmode1 = (addrmode >> 1) & 1;
66 }
67
68
69 void setswizzle_PVSSRC(PVSSRC* pPVSSRC, BITS swz)
70 {
71 pPVSSRC->swizzlex =
72 pPVSSRC->swizzley =
73 pPVSSRC->swizzlez =
74 pPVSSRC->swizzlew = swz;
75 }
76
77 void noswizzle_PVSSRC(PVSSRC* pPVSSRC)
78 {
79 pPVSSRC->swizzlex = SQ_SEL_X;
80 pPVSSRC->swizzley = SQ_SEL_Y;
81 pPVSSRC->swizzlez = SQ_SEL_Z;
82 pPVSSRC->swizzlew = SQ_SEL_W;
83 }
84
85 void
86 swizzleagain_PVSSRC(PVSSRC * pPVSSRC, BITS x, BITS y, BITS z, BITS w)
87 {
88 switch (x)
89 {
90 case SQ_SEL_X: x = pPVSSRC->swizzlex;
91 break;
92 case SQ_SEL_Y: x = pPVSSRC->swizzley;
93 break;
94 case SQ_SEL_Z: x = pPVSSRC->swizzlez;
95 break;
96 case SQ_SEL_W: x = pPVSSRC->swizzlew;
97 break;
98 default:;
99 }
100
101 switch (y)
102 {
103 case SQ_SEL_X: y = pPVSSRC->swizzlex;
104 break;
105 case SQ_SEL_Y: y = pPVSSRC->swizzley;
106 break;
107 case SQ_SEL_Z: y = pPVSSRC->swizzlez;
108 break;
109 case SQ_SEL_W: y = pPVSSRC->swizzlew;
110 break;
111 default:;
112 }
113
114 switch (z)
115 {
116 case SQ_SEL_X: z = pPVSSRC->swizzlex;
117 break;
118 case SQ_SEL_Y: z = pPVSSRC->swizzley;
119 break;
120 case SQ_SEL_Z: z = pPVSSRC->swizzlez;
121 break;
122 case SQ_SEL_W: z = pPVSSRC->swizzlew;
123 break;
124 default:;
125 }
126
127 switch (w)
128 {
129 case SQ_SEL_X: w = pPVSSRC->swizzlex;
130 break;
131 case SQ_SEL_Y: w = pPVSSRC->swizzley;
132 break;
133 case SQ_SEL_Z: w = pPVSSRC->swizzlez;
134 break;
135 case SQ_SEL_W: w = pPVSSRC->swizzlew;
136 break;
137 default:;
138 }
139
140 pPVSSRC->swizzlex = x;
141 pPVSSRC->swizzley = y;
142 pPVSSRC->swizzlez = z;
143 pPVSSRC->swizzlew = w;
144 }
145
146 void neg_PVSSRC(PVSSRC* pPVSSRC)
147 {
148 pPVSSRC->negx = 1;
149 pPVSSRC->negy = 1;
150 pPVSSRC->negz = 1;
151 pPVSSRC->negw = 1;
152 }
153
154 void noneg_PVSSRC(PVSSRC* pPVSSRC)
155 {
156 pPVSSRC->negx = 0;
157 pPVSSRC->negy = 0;
158 pPVSSRC->negz = 0;
159 pPVSSRC->negw = 0;
160 }
161
162 // negate argument (for SUB instead of ADD and alike)
163 void flipneg_PVSSRC(PVSSRC* pPVSSRC)
164 {
165 pPVSSRC->negx = !pPVSSRC->negx;
166 pPVSSRC->negy = !pPVSSRC->negy;
167 pPVSSRC->negz = !pPVSSRC->negz;
168 pPVSSRC->negw = !pPVSSRC->negw;
169 }
170
171 void zerocomp_PVSSRC(PVSSRC* pPVSSRC, int c)
172 {
173 switch (c)
174 {
175 case 0: pPVSSRC->swizzlex = SQ_SEL_0; pPVSSRC->negx = 0; break;
176 case 1: pPVSSRC->swizzley = SQ_SEL_0; pPVSSRC->negy = 0; break;
177 case 2: pPVSSRC->swizzlez = SQ_SEL_0; pPVSSRC->negz = 0; break;
178 case 3: pPVSSRC->swizzlew = SQ_SEL_0; pPVSSRC->negw = 0; break;
179 default:;
180 }
181 }
182
183 void onecomp_PVSSRC(PVSSRC* pPVSSRC, int c)
184 {
185 switch (c)
186 {
187 case 0: pPVSSRC->swizzlex = SQ_SEL_1; pPVSSRC->negx = 0; break;
188 case 1: pPVSSRC->swizzley = SQ_SEL_1; pPVSSRC->negy = 0; break;
189 case 2: pPVSSRC->swizzlez = SQ_SEL_1; pPVSSRC->negz = 0; break;
190 case 3: pPVSSRC->swizzlew = SQ_SEL_1; pPVSSRC->negw = 0; break;
191 default:;
192 }
193 }
194
195 BITS is_misc_component_exported(VAP_OUT_VTX_FMT_0* pOutVTXFmt0)
196 {
197 return (pOutVTXFmt0->point_size |
198 pOutVTXFmt0->edge_flag |
199 pOutVTXFmt0->rta_index |
200 pOutVTXFmt0->kill_flag |
201 pOutVTXFmt0->viewport_index);
202 }
203
204 BITS is_depth_component_exported(OUT_FRAGMENT_FMT_0* pFPOutFmt)
205 {
206 return (pFPOutFmt->depth |
207 pFPOutFmt->stencil_ref |
208 pFPOutFmt->mask |
209 pFPOutFmt->coverage_to_mask);
210 }
211
212 GLboolean is_reduction_opcode(PVSDWORD* dest)
213 {
214 if (dest->dst.op3 == 0)
215 {
216 if ( (dest->dst.opcode == SQ_OP2_INST_DOT4 || dest->dst.opcode == SQ_OP2_INST_DOT4_IEEE || dest->dst.opcode == SQ_OP2_INST_CUBE) )
217 {
218 return GL_TRUE;
219 }
220 }
221 return GL_FALSE;
222 }
223
224 GLuint GetSurfaceFormat(GLenum eType, GLuint nChannels, GLuint * pClient_size)
225 {
226 GLuint format = FMT_INVALID;
227 GLuint uiElemSize = 0;
228
229 switch (eType)
230 {
231 case GL_BYTE:
232 case GL_UNSIGNED_BYTE:
233 uiElemSize = 1;
234 switch(nChannels)
235 {
236 case 1:
237 format = FMT_8; break;
238 case 2:
239 format = FMT_8_8; break;
240 case 3:
241 format = FMT_8_8_8; break;
242 case 4:
243 format = FMT_8_8_8_8; break;
244 default:
245 break;
246 }
247 break;
248
249 case GL_UNSIGNED_SHORT:
250 case GL_SHORT:
251 uiElemSize = 2;
252 switch(nChannels)
253 {
254 case 1:
255 format = FMT_16; break;
256 case 2:
257 format = FMT_16_16; break;
258 case 3:
259 format = FMT_16_16_16; break;
260 case 4:
261 format = FMT_16_16_16_16; break;
262 default:
263 break;
264 }
265 break;
266
267 case GL_UNSIGNED_INT:
268 case GL_INT:
269 uiElemSize = 4;
270 switch(nChannels)
271 {
272 case 1:
273 format = FMT_32; break;
274 case 2:
275 format = FMT_32_32; break;
276 case 3:
277 format = FMT_32_32_32; break;
278 case 4:
279 format = FMT_32_32_32_32; break;
280 default:
281 break;
282 }
283 break;
284
285 case GL_FLOAT:
286 uiElemSize = 4;
287 switch(nChannels)
288 {
289 case 1:
290 format = FMT_32_FLOAT; break;
291 case 2:
292 format = FMT_32_32_FLOAT; break;
293 case 3:
294 format = FMT_32_32_32_FLOAT; break;
295 case 4:
296 format = FMT_32_32_32_32_FLOAT; break;
297 default:
298 break;
299 }
300 break;
301 case GL_DOUBLE:
302 uiElemSize = 8;
303 switch(nChannels)
304 {
305 case 1:
306 format = FMT_32_FLOAT; break;
307 case 2:
308 format = FMT_32_32_FLOAT; break;
309 case 3:
310 format = FMT_32_32_32_FLOAT; break;
311 case 4:
312 format = FMT_32_32_32_32_FLOAT; break;
313 default:
314 break;
315 }
316 break;
317 default:
318 ;
319 //GL_ASSERT_NO_CASE();
320 }
321
322 if(NULL != pClient_size)
323 {
324 *pClient_size = uiElemSize * nChannels;
325 }
326
327 return(format);
328 }
329
330 unsigned int r700GetNumOperands(r700_AssemblerBase* pAsm)
331 {
332 if(pAsm->D.dst.op3)
333 {
334 return 3;
335 }
336
337 switch (pAsm->D.dst.opcode)
338 {
339 case SQ_OP2_INST_ADD:
340 case SQ_OP2_INST_KILLGT:
341 case SQ_OP2_INST_MUL:
342 case SQ_OP2_INST_MAX:
343 case SQ_OP2_INST_MIN:
344 //case SQ_OP2_INST_MAX_DX10:
345 //case SQ_OP2_INST_MIN_DX10:
346 case SQ_OP2_INST_SETGT:
347 case SQ_OP2_INST_SETGE:
348 case SQ_OP2_INST_PRED_SETE:
349 case SQ_OP2_INST_PRED_SETGT:
350 case SQ_OP2_INST_PRED_SETGE:
351 case SQ_OP2_INST_PRED_SETNE:
352 case SQ_OP2_INST_DOT4:
353 case SQ_OP2_INST_DOT4_IEEE:
354 case SQ_OP2_INST_CUBE:
355 return 2;
356
357 case SQ_OP2_INST_MOV:
358 case SQ_OP2_INST_FRACT:
359 case SQ_OP2_INST_FLOOR:
360 case SQ_OP2_INST_EXP_IEEE:
361 case SQ_OP2_INST_LOG_CLAMPED:
362 case SQ_OP2_INST_LOG_IEEE:
363 case SQ_OP2_INST_RECIP_IEEE:
364 case SQ_OP2_INST_RECIPSQRT_IEEE:
365 case SQ_OP2_INST_FLT_TO_INT:
366 case SQ_OP2_INST_SIN:
367 case SQ_OP2_INST_COS:
368 return 1;
369
370 default: radeon_error(
371 "Need instruction operand number for %x.\n", pAsm->D.dst.opcode);
372 };
373
374 return 3;
375 }
376
377 int Init_r700_AssemblerBase(SHADER_PIPE_TYPE spt, r700_AssemblerBase* pAsm, R700_Shader* pShader)
378 {
379 GLuint i;
380
381 Init_R700_Shader(pShader);
382 pAsm->pR700Shader = pShader;
383 pAsm->currentShaderType = spt;
384
385 pAsm->cf_last_export_ptr = NULL;
386
387 pAsm->cf_current_export_clause_ptr = NULL;
388 pAsm->cf_current_alu_clause_ptr = NULL;
389 pAsm->cf_current_tex_clause_ptr = NULL;
390 pAsm->cf_current_vtx_clause_ptr = NULL;
391 pAsm->cf_current_cf_clause_ptr = NULL;
392
393 // No clause has been created yet
394 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
395
396 pAsm->number_of_colorandz_exports = 0;
397 pAsm->number_of_exports = 0;
398 pAsm->number_of_export_opcodes = 0;
399
400
401 pAsm->D.bits = 0;
402 pAsm->S[0].bits = 0;
403 pAsm->S[1].bits = 0;
404 pAsm->S[2].bits = 0;
405
406 pAsm->uLastPosUpdate = 0;
407
408 *(BITS *) &pAsm->fp_stOutFmt0 = 0;
409
410 pAsm->uIIns = 0;
411 pAsm->uOIns = 0;
412 pAsm->number_used_registers = 0;
413 pAsm->uUsedConsts = 256;
414
415
416 // Fragment programs
417 pAsm->uBoolConsts = 0;
418 pAsm->uIntConsts = 0;
419 pAsm->uInsts = 0;
420 pAsm->uConsts = 0;
421
422 pAsm->FCSP = 0;
423 pAsm->fc_stack[0].type = FC_NONE;
424
425 pAsm->branch_depth = 0;
426 pAsm->max_branch_depth = 0;
427
428 pAsm->aArgSubst[0] =
429 pAsm->aArgSubst[1] =
430 pAsm->aArgSubst[2] =
431 pAsm->aArgSubst[3] = (-1);
432
433 pAsm->uOutputs = 0;
434
435 for (i=0; i<NUMBER_OF_OUTPUT_COLORS; i++)
436 {
437 pAsm->color_export_register_number[i] = (-1);
438 }
439
440
441 pAsm->depth_export_register_number = (-1);
442 pAsm->stencil_export_register_number = (-1);
443 pAsm->coverage_to_mask_export_register_number = (-1);
444 pAsm->mask_export_register_number = (-1);
445
446 pAsm->starting_export_register_number = 0;
447 pAsm->starting_vfetch_register_number = 0;
448 pAsm->starting_temp_register_number = 0;
449 pAsm->uFirstHelpReg = 0;
450
451
452 pAsm->input_position_is_used = GL_FALSE;
453 pAsm->input_normal_is_used = GL_FALSE;
454
455
456 for (i=0; i<NUMBER_OF_INPUT_COLORS; i++)
457 {
458 pAsm->input_color_is_used[ i ] = GL_FALSE;
459 }
460
461 for (i=0; i<NUMBER_OF_TEXTURE_UNITS; i++)
462 {
463 pAsm->input_texture_unit_is_used[ i ] = GL_FALSE;
464 }
465
466 for (i=0; i<VERT_ATTRIB_MAX; i++)
467 {
468 pAsm->vfetch_instruction_ptr_array[ i ] = NULL;
469 }
470
471 pAsm->number_of_inputs = 0;
472
473 pAsm->is_tex = GL_FALSE;
474 pAsm->need_tex_barrier = GL_FALSE;
475
476 return 0;
477 }
478
479 GLboolean IsTex(gl_inst_opcode Opcode)
480 {
481 if( (OPCODE_TEX==Opcode) || (OPCODE_TXP==Opcode) || (OPCODE_TXB==Opcode) )
482 {
483 return GL_TRUE;
484 }
485 return GL_FALSE;
486 }
487
488 GLboolean IsAlu(gl_inst_opcode Opcode)
489 {
490 //TODO : more for fc and ex for higher spec.
491 if( IsTex(Opcode) )
492 {
493 return GL_FALSE;
494 }
495 return GL_TRUE;
496 }
497
498 int check_current_clause(r700_AssemblerBase* pAsm,
499 CF_CLAUSE_TYPE new_clause_type)
500 {
501 if (pAsm->cf_current_clause_type != new_clause_type)
502 { //Close last open clause
503 switch (pAsm->cf_current_clause_type)
504 {
505 case CF_ALU_CLAUSE:
506 if ( pAsm->cf_current_alu_clause_ptr != NULL)
507 {
508 pAsm->cf_current_alu_clause_ptr = NULL;
509 }
510 break;
511 case CF_VTX_CLAUSE:
512 if ( pAsm->cf_current_vtx_clause_ptr != NULL)
513 {
514 pAsm->cf_current_vtx_clause_ptr = NULL;
515 }
516 break;
517 case CF_TEX_CLAUSE:
518 if ( pAsm->cf_current_tex_clause_ptr != NULL)
519 {
520 pAsm->cf_current_tex_clause_ptr = NULL;
521 }
522 break;
523 case CF_EXPORT_CLAUSE:
524 if ( pAsm->cf_current_export_clause_ptr != NULL)
525 {
526 pAsm->cf_current_export_clause_ptr = NULL;
527 }
528 break;
529 case CF_OTHER_CLAUSE:
530 if ( pAsm->cf_current_cf_clause_ptr != NULL)
531 {
532 pAsm->cf_current_cf_clause_ptr = NULL;
533 }
534 break;
535 case CF_EMPTY_CLAUSE:
536 break;
537 default:
538 radeon_error(
539 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
540 return GL_FALSE;
541 }
542
543 pAsm->cf_current_clause_type = CF_EMPTY_CLAUSE;
544
545 // Create new clause
546 switch (new_clause_type)
547 {
548 case CF_ALU_CLAUSE:
549 pAsm->cf_current_clause_type = CF_ALU_CLAUSE;
550 break;
551 case CF_VTX_CLAUSE:
552 pAsm->cf_current_clause_type = CF_VTX_CLAUSE;
553 break;
554 case CF_TEX_CLAUSE:
555 pAsm->cf_current_clause_type = CF_TEX_CLAUSE;
556 break;
557 case CF_EXPORT_CLAUSE:
558 {
559 R700ControlFlowSXClause* pR700ControlFlowSXClause
560 = (R700ControlFlowSXClause*) CALLOC_STRUCT(R700ControlFlowSXClause);
561
562 // Add new export instruction to control flow program
563 if (pR700ControlFlowSXClause != 0)
564 {
565 pAsm->cf_current_export_clause_ptr = pR700ControlFlowSXClause;
566 Init_R700ControlFlowSXClause(pR700ControlFlowSXClause);
567 AddCFInstruction( pAsm->pR700Shader,
568 (R700ControlFlowInstruction *)pR700ControlFlowSXClause );
569 }
570 else
571 {
572 radeon_error(
573 "Error allocating new EXPORT CF instruction in check_current_clause. \n");
574 return GL_FALSE;
575 }
576 pAsm->cf_current_clause_type = CF_EXPORT_CLAUSE;
577 }
578 break;
579 case CF_EMPTY_CLAUSE:
580 break;
581 case CF_OTHER_CLAUSE:
582 pAsm->cf_current_clause_type = CF_OTHER_CLAUSE;
583 break;
584 default:
585 radeon_error(
586 "Unknown CF_CLAUSE_TYPE (%d) in check_current_clause. \n", (int) new_clause_type);
587 return GL_FALSE;
588 }
589 }
590
591 return GL_TRUE;
592 }
593
594 GLboolean add_vfetch_instruction(r700_AssemblerBase* pAsm,
595 R700VertexInstruction* vertex_instruction_ptr)
596 {
597 if( GL_FALSE == check_current_clause(pAsm, CF_VTX_CLAUSE) )
598 {
599 return GL_FALSE;
600 }
601
602 if( pAsm->cf_current_vtx_clause_ptr == NULL ||
603 ( (pAsm->cf_current_vtx_clause_ptr != NULL) &&
604 (pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_vtx_clause_ptr->m_ShaderInstType)-1)
605 ) )
606 {
607 // Create new Vfetch control flow instruction for this new clause
608 pAsm->cf_current_vtx_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
609
610 if (pAsm->cf_current_vtx_clause_ptr != NULL)
611 {
612 Init_R700ControlFlowGenericClause(pAsm->cf_current_vtx_clause_ptr);
613 AddCFInstruction( pAsm->pR700Shader,
614 (R700ControlFlowInstruction *)pAsm->cf_current_vtx_clause_ptr );
615 }
616 else
617 {
618 radeon_error("Could not allocate a new VFetch CF instruction.\n");
619 return GL_FALSE;
620 }
621
622 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.pop_count = 0x0;
623 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_const = 0x0;
624 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
625 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count = 0x0;
626 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.end_of_program = 0x0;
627 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
628 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_VTX;
629 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
630 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.barrier = 0x1;
631
632 LinkVertexInstruction(pAsm->cf_current_vtx_clause_ptr, vertex_instruction_ptr );
633 }
634 else
635 {
636 pAsm->cf_current_vtx_clause_ptr->m_Word1.f.count++;
637 }
638
639 AddVTXInstruction(pAsm->pR700Shader, vertex_instruction_ptr);
640
641 return GL_TRUE;
642 }
643
644 GLboolean add_tex_instruction(r700_AssemblerBase* pAsm,
645 R700TextureInstruction* tex_instruction_ptr)
646 {
647 if ( GL_FALSE == check_current_clause(pAsm, CF_TEX_CLAUSE) )
648 {
649 return GL_FALSE;
650 }
651
652 if ( pAsm->cf_current_tex_clause_ptr == NULL ||
653 ( (pAsm->cf_current_tex_clause_ptr != NULL) &&
654 (pAsm->cf_current_tex_clause_ptr->m_Word1.f.count >= GetCFMaxInstructions(pAsm->cf_current_tex_clause_ptr->m_ShaderInstType)-1)
655 ) )
656 {
657 // new tex cf instruction for this new clause
658 pAsm->cf_current_tex_clause_ptr = (R700ControlFlowGenericClause*) CALLOC_STRUCT(R700ControlFlowGenericClause);
659
660 if (pAsm->cf_current_tex_clause_ptr != NULL)
661 {
662 Init_R700ControlFlowGenericClause(pAsm->cf_current_tex_clause_ptr);
663 AddCFInstruction( pAsm->pR700Shader,
664 (R700ControlFlowInstruction *)pAsm->cf_current_tex_clause_ptr );
665 }
666 else
667 {
668 radeon_error("Could not allocate a new TEX CF instruction.\n");
669 return GL_FALSE;
670 }
671
672 pAsm->cf_current_tex_clause_ptr->m_Word1.f.pop_count = 0x0;
673 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_const = 0x0;
674 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cond = SQ_CF_COND_ACTIVE;
675
676 pAsm->cf_current_tex_clause_ptr->m_Word1.f.end_of_program = 0x0;
677 pAsm->cf_current_tex_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
678 pAsm->cf_current_tex_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_TEX;
679 pAsm->cf_current_tex_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
680 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x0; //0x1;
681 }
682 else
683 {
684 pAsm->cf_current_tex_clause_ptr->m_Word1.f.count++;
685 }
686
687 // If this clause constains any TEX instruction that is dependent on a previous instruction,
688 // set the barrier bit
689 if( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) || pAsm->need_tex_barrier == GL_TRUE )
690 {
691 pAsm->cf_current_tex_clause_ptr->m_Word1.f.barrier = 0x1;
692 }
693
694 if(NULL == pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction)
695 {
696 pAsm->cf_current_tex_clause_ptr->m_pLinkedTEXInstruction = tex_instruction_ptr;
697 tex_instruction_ptr->m_pLinkedGenericClause = pAsm->cf_current_tex_clause_ptr;
698 }
699
700 AddTEXInstruction(pAsm->pR700Shader, tex_instruction_ptr);
701
702 return GL_TRUE;
703 }
704
705 GLboolean assemble_vfetch_instruction(r700_AssemblerBase* pAsm,
706 GLuint gl_client_id,
707 GLuint destination_register,
708 GLuint number_of_elements,
709 GLenum dataElementType,
710 VTX_FETCH_METHOD* pFetchMethod)
711 {
712 GLuint client_size_inbyte;
713 GLuint data_format;
714 GLuint mega_fetch_count;
715 GLuint is_mega_fetch_flag;
716
717 R700VertexGenericFetch* vfetch_instruction_ptr;
718 R700VertexGenericFetch* assembled_vfetch_instruction_ptr = pAsm->vfetch_instruction_ptr_array[ gl_client_id ];
719
720 if (assembled_vfetch_instruction_ptr == NULL)
721 {
722 vfetch_instruction_ptr = (R700VertexGenericFetch*) CALLOC_STRUCT(R700VertexGenericFetch);
723 if (vfetch_instruction_ptr == NULL)
724 {
725 return GL_FALSE;
726 }
727 Init_R700VertexGenericFetch(vfetch_instruction_ptr);
728 }
729 else
730 {
731 vfetch_instruction_ptr = assembled_vfetch_instruction_ptr;
732 }
733
734 data_format = GetSurfaceFormat(dataElementType, number_of_elements, &client_size_inbyte);
735
736 if(GL_TRUE == pFetchMethod->bEnableMini) //More conditions here
737 {
738 //TODO : mini fetch
739 }
740 else
741 {
742 mega_fetch_count = MEGA_FETCH_BYTES - 1;
743 is_mega_fetch_flag = 0x1;
744 pFetchMethod->mega_fetch_remainder = MEGA_FETCH_BYTES - client_size_inbyte;
745 }
746
747 vfetch_instruction_ptr->m_Word0.f.vtx_inst = SQ_VTX_INST_FETCH;
748 vfetch_instruction_ptr->m_Word0.f.fetch_type = SQ_VTX_FETCH_VERTEX_DATA;
749 vfetch_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
750
751 vfetch_instruction_ptr->m_Word0.f.buffer_id = gl_client_id;
752 vfetch_instruction_ptr->m_Word0.f.src_gpr = 0x0;
753 vfetch_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
754 vfetch_instruction_ptr->m_Word0.f.src_sel_x = SQ_SEL_X;
755 vfetch_instruction_ptr->m_Word0.f.mega_fetch_count = mega_fetch_count;
756
757 vfetch_instruction_ptr->m_Word1.f.dst_sel_x = (number_of_elements < 1) ? SQ_SEL_0 : SQ_SEL_X;
758 vfetch_instruction_ptr->m_Word1.f.dst_sel_y = (number_of_elements < 2) ? SQ_SEL_0 : SQ_SEL_Y;
759 vfetch_instruction_ptr->m_Word1.f.dst_sel_z = (number_of_elements < 3) ? SQ_SEL_0 : SQ_SEL_Z;
760 vfetch_instruction_ptr->m_Word1.f.dst_sel_w = (number_of_elements < 4) ? SQ_SEL_1 : SQ_SEL_W;
761
762 vfetch_instruction_ptr->m_Word1.f.use_const_fields = 1;
763
764 // Destination register
765 vfetch_instruction_ptr->m_Word1_GPR.f.dst_gpr = destination_register;
766 vfetch_instruction_ptr->m_Word1_GPR.f.dst_rel = SQ_ABSOLUTE;
767
768 vfetch_instruction_ptr->m_Word2.f.offset = 0;
769 vfetch_instruction_ptr->m_Word2.f.const_buf_no_stride = 0x0;
770
771 vfetch_instruction_ptr->m_Word2.f.mega_fetch = is_mega_fetch_flag;
772
773 if (assembled_vfetch_instruction_ptr == NULL)
774 {
775 if ( GL_FALSE == add_vfetch_instruction(pAsm, (R700VertexInstruction *)vfetch_instruction_ptr) )
776 {
777 return GL_FALSE;
778 }
779
780 if (pAsm->vfetch_instruction_ptr_array[ gl_client_id ] != NULL)
781 {
782 return GL_FALSE;
783 }
784 else
785 {
786 pAsm->vfetch_instruction_ptr_array[ gl_client_id ] = vfetch_instruction_ptr;
787 }
788 }
789
790 return GL_TRUE;
791 }
792
793 GLuint gethelpr(r700_AssemblerBase* pAsm)
794 {
795 GLuint r = pAsm->uHelpReg;
796 pAsm->uHelpReg++;
797 if (pAsm->uHelpReg > pAsm->number_used_registers)
798 {
799 pAsm->number_used_registers = pAsm->uHelpReg;
800 }
801 return r;
802 }
803 void resethelpr(r700_AssemblerBase* pAsm)
804 {
805 pAsm->uHelpReg = pAsm->uFirstHelpReg;
806 }
807
808 void checkop_init(r700_AssemblerBase* pAsm)
809 {
810 resethelpr(pAsm);
811 pAsm->aArgSubst[0] =
812 pAsm->aArgSubst[1] =
813 pAsm->aArgSubst[2] =
814 pAsm->aArgSubst[3] = -1;
815 }
816
817 GLboolean mov_temp(r700_AssemblerBase* pAsm, int src)
818 {
819 GLuint tmp = gethelpr(pAsm);
820
821 //mov src to temp helper gpr.
822 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
823
824 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
825
826 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
827 pAsm->D.dst.reg = tmp;
828
829 nomask_PVSDST(&(pAsm->D.dst));
830
831 if( GL_FALSE == assemble_src(pAsm, src, 0) )
832 {
833 return GL_FALSE;
834 }
835
836 noswizzle_PVSSRC(&(pAsm->S[0].src));
837 noneg_PVSSRC(&(pAsm->S[0].src));
838
839 if( GL_FALSE == next_ins(pAsm) )
840 {
841 return GL_FALSE;
842 }
843
844 pAsm->aArgSubst[1 + src] = tmp;
845
846 return GL_TRUE;
847 }
848
849 GLboolean checkop1(r700_AssemblerBase* pAsm)
850 {
851 checkop_init(pAsm);
852 return GL_TRUE;
853 }
854
855 GLboolean checkop2(r700_AssemblerBase* pAsm)
856 {
857 GLboolean bSrcConst[2];
858 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
859
860 checkop_init(pAsm);
861
862 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
863 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
864 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
865 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
866 {
867 bSrcConst[0] = GL_TRUE;
868 }
869 else
870 {
871 bSrcConst[0] = GL_FALSE;
872 }
873 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
874 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
875 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
876 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
877 {
878 bSrcConst[1] = GL_TRUE;
879 }
880 else
881 {
882 bSrcConst[1] = GL_FALSE;
883 }
884
885 if( (bSrcConst[0] == GL_TRUE) && (bSrcConst[1] == GL_TRUE) )
886 {
887 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
888 {
889 if( GL_FALSE == mov_temp(pAsm, 1) )
890 {
891 return GL_FALSE;
892 }
893 }
894 }
895
896 return GL_TRUE;
897 }
898
899 GLboolean checkop3(r700_AssemblerBase* pAsm)
900 {
901 GLboolean bSrcConst[3];
902 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
903
904 checkop_init(pAsm);
905
906 if( (pILInst->SrcReg[0].File == PROGRAM_CONSTANT) ||
907 (pILInst->SrcReg[0].File == PROGRAM_LOCAL_PARAM) ||
908 (pILInst->SrcReg[0].File == PROGRAM_ENV_PARAM) ||
909 (pILInst->SrcReg[0].File == PROGRAM_STATE_VAR) )
910 {
911 bSrcConst[0] = GL_TRUE;
912 }
913 else
914 {
915 bSrcConst[0] = GL_FALSE;
916 }
917 if( (pILInst->SrcReg[1].File == PROGRAM_CONSTANT) ||
918 (pILInst->SrcReg[1].File == PROGRAM_LOCAL_PARAM) ||
919 (pILInst->SrcReg[1].File == PROGRAM_ENV_PARAM) ||
920 (pILInst->SrcReg[1].File == PROGRAM_STATE_VAR) )
921 {
922 bSrcConst[1] = GL_TRUE;
923 }
924 else
925 {
926 bSrcConst[1] = GL_FALSE;
927 }
928 if( (pILInst->SrcReg[2].File == PROGRAM_CONSTANT) ||
929 (pILInst->SrcReg[2].File == PROGRAM_LOCAL_PARAM) ||
930 (pILInst->SrcReg[2].File == PROGRAM_ENV_PARAM) ||
931 (pILInst->SrcReg[2].File == PROGRAM_STATE_VAR) )
932 {
933 bSrcConst[2] = GL_TRUE;
934 }
935 else
936 {
937 bSrcConst[2] = GL_FALSE;
938 }
939
940 if( (GL_TRUE == bSrcConst[0]) &&
941 (GL_TRUE == bSrcConst[1]) &&
942 (GL_TRUE == bSrcConst[2]) )
943 {
944 if( GL_FALSE == mov_temp(pAsm, 1) )
945 {
946 return GL_FALSE;
947 }
948 if( GL_FALSE == mov_temp(pAsm, 2) )
949 {
950 return GL_FALSE;
951 }
952
953 return GL_TRUE;
954 }
955 else if( (GL_TRUE == bSrcConst[0]) &&
956 (GL_TRUE == bSrcConst[1]) )
957 {
958 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[1].Index)
959 {
960 if( GL_FALSE == mov_temp(pAsm, 1) )
961 {
962 return 1;
963 }
964 }
965
966 return GL_TRUE;
967 }
968 else if ( (GL_TRUE == bSrcConst[0]) &&
969 (GL_TRUE == bSrcConst[2]) )
970 {
971 if(pILInst->SrcReg[0].Index != pILInst->SrcReg[2].Index)
972 {
973 if( GL_FALSE == mov_temp(pAsm, 2) )
974 {
975 return GL_FALSE;
976 }
977 }
978
979 return GL_TRUE;
980 }
981 else if( (GL_TRUE == bSrcConst[1]) &&
982 (GL_TRUE == bSrcConst[2]) )
983 {
984 if(pILInst->SrcReg[1].Index != pILInst->SrcReg[2].Index)
985 {
986 if( GL_FALSE == mov_temp(pAsm, 2) )
987 {
988 return GL_FALSE;
989 }
990 }
991
992 return GL_TRUE;
993 }
994
995 return GL_TRUE;
996 }
997
998 GLboolean assemble_src(r700_AssemblerBase *pAsm,
999 int src,
1000 int fld)
1001 {
1002 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1003
1004 if (fld == -1)
1005 {
1006 fld = src;
1007 }
1008
1009 if(pAsm->aArgSubst[1+src] >= 0)
1010 {
1011 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1012 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1013 pAsm->S[fld].src.reg = pAsm->aArgSubst[1+src];
1014 }
1015 else
1016 {
1017 switch (pILInst->SrcReg[src].File)
1018 {
1019 case PROGRAM_TEMPORARY:
1020 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1021 pAsm->S[fld].src.rtype = SRC_REG_TEMPORARY;
1022 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index + pAsm->starting_temp_register_number;
1023 break;
1024 case PROGRAM_CONSTANT:
1025 case PROGRAM_LOCAL_PARAM:
1026 case PROGRAM_ENV_PARAM:
1027 case PROGRAM_STATE_VAR:
1028 if (1 == pILInst->SrcReg[src].RelAddr)
1029 {
1030 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_RELATIVE_A0);
1031 }
1032 else
1033 {
1034 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1035 }
1036
1037 pAsm->S[fld].src.rtype = SRC_REG_CONSTANT;
1038 pAsm->S[fld].src.reg = pILInst->SrcReg[src].Index;
1039 break;
1040 case PROGRAM_INPUT:
1041 setaddrmode_PVSSRC(&(pAsm->S[fld].src), ADDR_ABSOLUTE);
1042 pAsm->S[fld].src.rtype = SRC_REG_INPUT;
1043 switch (pAsm->currentShaderType)
1044 {
1045 case SPT_FP:
1046 pAsm->S[fld].src.reg = pAsm->uiFP_AttributeMap[pILInst->SrcReg[src].Index];
1047 break;
1048 case SPT_VP:
1049 pAsm->S[fld].src.reg = pAsm->ucVP_AttributeMap[pILInst->SrcReg[src].Index];
1050 break;
1051 }
1052 break;
1053 default:
1054 radeon_error("Invalid source argument type\n");
1055 return GL_FALSE;
1056 }
1057 }
1058
1059 pAsm->S[fld].src.swizzlex = pILInst->SrcReg[src].Swizzle & 0x7;
1060 pAsm->S[fld].src.swizzley = (pILInst->SrcReg[src].Swizzle >> 3) & 0x7;
1061 pAsm->S[fld].src.swizzlez = (pILInst->SrcReg[src].Swizzle >> 6) & 0x7;
1062 pAsm->S[fld].src.swizzlew = (pILInst->SrcReg[src].Swizzle >> 9) & 0x7;
1063
1064 pAsm->S[fld].src.negx = pILInst->SrcReg[src].Negate & 0x1;
1065 pAsm->S[fld].src.negy = (pILInst->SrcReg[src].Negate >> 1) & 0x1;
1066 pAsm->S[fld].src.negz = (pILInst->SrcReg[src].Negate >> 2) & 0x1;
1067 pAsm->S[fld].src.negw = (pILInst->SrcReg[src].Negate >> 3) & 0x1;
1068
1069 return GL_TRUE;
1070 }
1071
1072 GLboolean assemble_dst(r700_AssemblerBase *pAsm)
1073 {
1074 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1075 switch (pILInst->DstReg.File)
1076 {
1077 case PROGRAM_TEMPORARY:
1078 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1079 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1080 pAsm->D.dst.reg = pILInst->DstReg.Index + pAsm->starting_temp_register_number;
1081 break;
1082 case PROGRAM_ADDRESS:
1083 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1084 pAsm->D.dst.rtype = DST_REG_A0;
1085 pAsm->D.dst.reg = 0;
1086 break;
1087 case PROGRAM_OUTPUT:
1088 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1089 pAsm->D.dst.rtype = DST_REG_OUT;
1090 switch (pAsm->currentShaderType)
1091 {
1092 case SPT_FP:
1093 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1094 break;
1095 case SPT_VP:
1096 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1097 break;
1098 }
1099 break;
1100 default:
1101 radeon_error("Invalid destination output argument type\n");
1102 return GL_FALSE;
1103 }
1104
1105 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1106 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1107 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1108 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1109
1110 return GL_TRUE;
1111 }
1112
1113 GLboolean tex_dst(r700_AssemblerBase *pAsm)
1114 {
1115 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1116
1117 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
1118 {
1119 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
1120 pAsm->D.dst.reg = pAsm->pILInst[pAsm->uiCurInst].DstReg.Index + pAsm->starting_temp_register_number;
1121
1122 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1123 }
1124 else if(PROGRAM_OUTPUT == pILInst->DstReg.File)
1125 {
1126 pAsm->D.dst.rtype = DST_REG_OUT;
1127 switch (pAsm->currentShaderType)
1128 {
1129 case SPT_FP:
1130 pAsm->D.dst.reg = pAsm->uiFP_OutputMap[pILInst->DstReg.Index];
1131 break;
1132 case SPT_VP:
1133 pAsm->D.dst.reg = pAsm->ucVP_OutputMap[pILInst->DstReg.Index];
1134 break;
1135 }
1136
1137 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
1138 }
1139 else
1140 {
1141 radeon_error("Invalid destination output argument type\n");
1142 return GL_FALSE;
1143 }
1144
1145 pAsm->D.dst.writex = pILInst->DstReg.WriteMask & 0x1;
1146 pAsm->D.dst.writey = (pILInst->DstReg.WriteMask >> 1) & 0x1;
1147 pAsm->D.dst.writez = (pILInst->DstReg.WriteMask >> 2) & 0x1;
1148 pAsm->D.dst.writew = (pILInst->DstReg.WriteMask >> 3) & 0x1;
1149
1150 return GL_TRUE;
1151 }
1152
1153 GLboolean tex_src(r700_AssemblerBase *pAsm)
1154 {
1155 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
1156
1157 GLboolean bValidTexCoord = GL_FALSE;
1158
1159 if(pAsm->aArgSubst[1] >= 0)
1160 {
1161 bValidTexCoord = GL_TRUE;
1162 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1163 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1164 pAsm->S[0].src.reg = pAsm->aArgSubst[1];
1165 }
1166 else
1167 {
1168 switch (pILInst->SrcReg[0].File) {
1169 case PROGRAM_CONSTANT:
1170 case PROGRAM_LOCAL_PARAM:
1171 case PROGRAM_ENV_PARAM:
1172 case PROGRAM_STATE_VAR:
1173 break;
1174 case PROGRAM_TEMPORARY:
1175 bValidTexCoord = GL_TRUE;
1176 pAsm->S[0].src.reg = pILInst->SrcReg[0].Index +
1177 pAsm->starting_temp_register_number;
1178 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
1179 break;
1180 case PROGRAM_INPUT:
1181 switch (pILInst->SrcReg[0].Index)
1182 {
1183 case FRAG_ATTRIB_WPOS:
1184 case FRAG_ATTRIB_COL0:
1185 case FRAG_ATTRIB_COL1:
1186 case FRAG_ATTRIB_FOGC:
1187 case FRAG_ATTRIB_TEX0:
1188 case FRAG_ATTRIB_TEX1:
1189 case FRAG_ATTRIB_TEX2:
1190 case FRAG_ATTRIB_TEX3:
1191 case FRAG_ATTRIB_TEX4:
1192 case FRAG_ATTRIB_TEX5:
1193 case FRAG_ATTRIB_TEX6:
1194 case FRAG_ATTRIB_TEX7:
1195 bValidTexCoord = GL_TRUE;
1196 pAsm->S[0].src.reg =
1197 pAsm->uiFP_AttributeMap[pILInst->SrcReg[0].Index];
1198 pAsm->S[0].src.rtype = SRC_REG_INPUT;
1199 break;
1200 case FRAG_ATTRIB_FACE:
1201 fprintf(stderr, "FRAG_ATTRIB_FACE unsupported\n");
1202 break;
1203 case FRAG_ATTRIB_PNTC:
1204 fprintf(stderr, "FRAG_ATTRIB_PNTC unsupported\n");
1205 break;
1206 case FRAG_ATTRIB_VAR0:
1207 fprintf(stderr, "FRAG_ATTRIB_VAR0 unsupported\n");
1208 break;
1209 }
1210 break;
1211 }
1212 }
1213
1214 if(GL_TRUE == bValidTexCoord)
1215 {
1216 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
1217 }
1218 else
1219 {
1220 radeon_error("Invalid source texcoord for TEX instruction\n");
1221 return GL_FALSE;
1222 }
1223
1224 pAsm->S[0].src.swizzlex = pILInst->SrcReg[0].Swizzle & 0x7;
1225 pAsm->S[0].src.swizzley = (pILInst->SrcReg[0].Swizzle >> 3) & 0x7;
1226 pAsm->S[0].src.swizzlez = (pILInst->SrcReg[0].Swizzle >> 6) & 0x7;
1227 pAsm->S[0].src.swizzlew = (pILInst->SrcReg[0].Swizzle >> 9) & 0x7;
1228
1229 pAsm->S[0].src.negx = pILInst->SrcReg[0].Negate & 0x1;
1230 pAsm->S[0].src.negy = (pILInst->SrcReg[0].Negate >> 1) & 0x1;
1231 pAsm->S[0].src.negz = (pILInst->SrcReg[0].Negate >> 2) & 0x1;
1232 pAsm->S[0].src.negw = (pILInst->SrcReg[0].Negate >> 3) & 0x1;
1233
1234 return GL_TRUE;
1235 }
1236
1237 GLboolean assemble_tex_instruction(r700_AssemblerBase *pAsm, GLboolean normalized)
1238 {
1239 PVSSRC * texture_coordinate_source;
1240 PVSSRC * texture_unit_source;
1241
1242 R700TextureInstruction* tex_instruction_ptr = (R700TextureInstruction*) CALLOC_STRUCT(R700TextureInstruction);
1243 if (tex_instruction_ptr == NULL)
1244 {
1245 return GL_FALSE;
1246 }
1247 Init_R700TextureInstruction(tex_instruction_ptr);
1248
1249 texture_coordinate_source = &(pAsm->S[0].src);
1250 texture_unit_source = &(pAsm->S[1].src);
1251
1252 tex_instruction_ptr->m_Word0.f.tex_inst = pAsm->D.dst.opcode;
1253 tex_instruction_ptr->m_Word0.f.bc_frac_mode = 0x0;
1254 tex_instruction_ptr->m_Word0.f.fetch_whole_quad = 0x0;
1255
1256 tex_instruction_ptr->m_Word0.f.resource_id = texture_unit_source->reg;
1257
1258 tex_instruction_ptr->m_Word1.f.lod_bias = 0x0;
1259 if (normalized) {
1260 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_NORMALIZED;
1261 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_NORMALIZED;
1262 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_NORMALIZED;
1263 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_NORMALIZED;
1264 } else {
1265 /* XXX: UNNORMALIZED tex coords have limited wrap modes */
1266 tex_instruction_ptr->m_Word1.f.coord_type_x = SQ_TEX_UNNORMALIZED;
1267 tex_instruction_ptr->m_Word1.f.coord_type_y = SQ_TEX_UNNORMALIZED;
1268 tex_instruction_ptr->m_Word1.f.coord_type_z = SQ_TEX_UNNORMALIZED;
1269 tex_instruction_ptr->m_Word1.f.coord_type_w = SQ_TEX_UNNORMALIZED;
1270 }
1271
1272 tex_instruction_ptr->m_Word2.f.offset_x = 0x0;
1273 tex_instruction_ptr->m_Word2.f.offset_y = 0x0;
1274 tex_instruction_ptr->m_Word2.f.offset_z = 0x0;
1275
1276 tex_instruction_ptr->m_Word2.f.sampler_id = texture_unit_source->reg;
1277
1278 // dst
1279 if ( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
1280 (pAsm->D.dst.rtype == DST_REG_OUT) )
1281 {
1282 tex_instruction_ptr->m_Word0.f.src_gpr = texture_coordinate_source->reg;
1283 tex_instruction_ptr->m_Word0.f.src_rel = SQ_ABSOLUTE;
1284
1285 tex_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
1286 tex_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE;
1287
1288 tex_instruction_ptr->m_Word1.f.dst_sel_x = (pAsm->D.dst.writex ? texture_unit_source->swizzlex : SQ_SEL_MASK);
1289 tex_instruction_ptr->m_Word1.f.dst_sel_y = (pAsm->D.dst.writey ? texture_unit_source->swizzley : SQ_SEL_MASK);
1290 tex_instruction_ptr->m_Word1.f.dst_sel_z = (pAsm->D.dst.writez ? texture_unit_source->swizzlez : SQ_SEL_MASK);
1291 tex_instruction_ptr->m_Word1.f.dst_sel_w = (pAsm->D.dst.writew ? texture_unit_source->swizzlew : SQ_SEL_MASK);
1292
1293
1294 tex_instruction_ptr->m_Word2.f.src_sel_x = texture_coordinate_source->swizzlex;
1295 tex_instruction_ptr->m_Word2.f.src_sel_y = texture_coordinate_source->swizzley;
1296 tex_instruction_ptr->m_Word2.f.src_sel_z = texture_coordinate_source->swizzlez;
1297 tex_instruction_ptr->m_Word2.f.src_sel_w = texture_coordinate_source->swizzlew;
1298 }
1299 else
1300 {
1301 radeon_error("Only temp destination registers supported for TEX dest regs.\n");
1302 return GL_FALSE;
1303 }
1304
1305 if( GL_FALSE == add_tex_instruction(pAsm, tex_instruction_ptr) )
1306 {
1307 return GL_FALSE;
1308 }
1309
1310 return GL_TRUE;
1311 }
1312
1313 void initialize(r700_AssemblerBase *pAsm)
1314 {
1315 GLuint cycle, component;
1316
1317 for (cycle=0; cycle<NUMBER_OF_CYCLES; cycle++)
1318 {
1319 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1320 {
1321 pAsm->hw_gpr[cycle][component] = (-1);
1322 }
1323 }
1324 for (component=0; component<NUMBER_OF_COMPONENTS; component++)
1325 {
1326 pAsm->hw_cfile_addr[component] = (-1);
1327 pAsm->hw_cfile_chan[component] = (-1);
1328 }
1329 }
1330
1331 GLboolean assemble_alu_src(R700ALUInstruction* alu_instruction_ptr,
1332 int source_index,
1333 PVSSRC* pSource,
1334 BITS scalar_channel_index)
1335 {
1336 BITS src_sel;
1337 BITS src_rel;
1338 BITS src_chan;
1339 BITS src_neg;
1340
1341 //--------------------------------------------------------------------------
1342 // Source for operands src0, src1.
1343 // Values [0,127] correspond to GPR[0..127].
1344 // Values [256,511] correspond to cfile constants c[0..255].
1345
1346 //--------------------------------------------------------------------------
1347 // Other special values are shown in the list below.
1348
1349 // 248 SQ_ALU_SRC_0: special constant 0.0.
1350 // 249 SQ_ALU_SRC_1: special constant 1.0 float.
1351
1352 // 250 SQ_ALU_SRC_1_INT: special constant 1 integer.
1353 // 251 SQ_ALU_SRC_M_1_INT: special constant -1 integer.
1354
1355 // 252 SQ_ALU_SRC_0_5: special constant 0.5 float.
1356 // 253 SQ_ALU_SRC_LITERAL: literal constant.
1357
1358 // 254 SQ_ALU_SRC_PV: previous vector result.
1359 // 255 SQ_ALU_SRC_PS: previous scalar result.
1360 //--------------------------------------------------------------------------
1361
1362 BITS channel_swizzle;
1363 switch (scalar_channel_index)
1364 {
1365 case 0: channel_swizzle = pSource->swizzlex; break;
1366 case 1: channel_swizzle = pSource->swizzley; break;
1367 case 2: channel_swizzle = pSource->swizzlez; break;
1368 case 3: channel_swizzle = pSource->swizzlew; break;
1369 default: channel_swizzle = SQ_SEL_MASK; break;
1370 }
1371
1372 if(channel_swizzle == SQ_SEL_0)
1373 {
1374 src_sel = SQ_ALU_SRC_0;
1375 }
1376 else if (channel_swizzle == SQ_SEL_1)
1377 {
1378 src_sel = SQ_ALU_SRC_1;
1379 }
1380 else
1381 {
1382 if ( (pSource->rtype == SRC_REG_TEMPORARY) ||
1383 (pSource->rtype == SRC_REG_INPUT)
1384 )
1385 {
1386 src_sel = pSource->reg;
1387 }
1388 else if (pSource->rtype == SRC_REG_CONSTANT)
1389 {
1390 src_sel = pSource->reg + CFILE_REGISTER_OFFSET;
1391 }
1392 else
1393 {
1394 radeon_error("Source (%d) register type (%d) not one of TEMP, INPUT, or CONSTANT.\n",
1395 source_index, pSource->rtype);
1396 return GL_FALSE;
1397 }
1398 }
1399
1400 if( ADDR_ABSOLUTE == addrmode_PVSSRC(pSource) )
1401 {
1402 src_rel = SQ_ABSOLUTE;
1403 }
1404 else
1405 {
1406 src_rel = SQ_RELATIVE;
1407 }
1408
1409 switch (channel_swizzle)
1410 {
1411 case SQ_SEL_X:
1412 src_chan = SQ_CHAN_X;
1413 break;
1414 case SQ_SEL_Y:
1415 src_chan = SQ_CHAN_Y;
1416 break;
1417 case SQ_SEL_Z:
1418 src_chan = SQ_CHAN_Z;
1419 break;
1420 case SQ_SEL_W:
1421 src_chan = SQ_CHAN_W;
1422 break;
1423 case SQ_SEL_0:
1424 case SQ_SEL_1:
1425 // Does not matter since src_sel controls
1426 src_chan = SQ_CHAN_X;
1427 break;
1428 default:
1429 radeon_error("Unknown source select value (%d) in assemble_alu_src().\n", channel_swizzle);
1430 return GL_FALSE;
1431 break;
1432 }
1433
1434 switch (scalar_channel_index)
1435 {
1436 case 0: src_neg = pSource->negx; break;
1437 case 1: src_neg = pSource->negy; break;
1438 case 2: src_neg = pSource->negz; break;
1439 case 3: src_neg = pSource->negw; break;
1440 default: src_neg = 0; break;
1441 }
1442
1443 switch (source_index)
1444 {
1445 case 0:
1446 alu_instruction_ptr->m_Word0.f.src0_sel = src_sel;
1447 alu_instruction_ptr->m_Word0.f.src0_rel = src_rel;
1448 alu_instruction_ptr->m_Word0.f.src0_chan = src_chan;
1449 alu_instruction_ptr->m_Word0.f.src0_neg = src_neg;
1450 break;
1451 case 1:
1452 alu_instruction_ptr->m_Word0.f.src1_sel = src_sel;
1453 alu_instruction_ptr->m_Word0.f.src1_rel = src_rel;
1454 alu_instruction_ptr->m_Word0.f.src1_chan = src_chan;
1455 alu_instruction_ptr->m_Word0.f.src1_neg = src_neg;
1456 break;
1457 case 2:
1458 alu_instruction_ptr->m_Word1_OP3.f.src2_sel = src_sel;
1459 alu_instruction_ptr->m_Word1_OP3.f.src2_rel = src_rel;
1460 alu_instruction_ptr->m_Word1_OP3.f.src2_chan = src_chan;
1461 alu_instruction_ptr->m_Word1_OP3.f.src2_neg = src_neg;
1462 break;
1463 default:
1464 radeon_error("Only three sources allowed in ALU opcodes.\n");
1465 return GL_FALSE;
1466 break;
1467 }
1468
1469 return GL_TRUE;
1470 }
1471
1472 GLboolean add_alu_instruction(r700_AssemblerBase* pAsm,
1473 R700ALUInstruction* alu_instruction_ptr,
1474 GLuint contiguous_slots_needed)
1475 {
1476 if( GL_FALSE == check_current_clause(pAsm, CF_ALU_CLAUSE) )
1477 {
1478 return GL_FALSE;
1479 }
1480
1481 if ( pAsm->cf_current_alu_clause_ptr == NULL ||
1482 ( (pAsm->cf_current_alu_clause_ptr != NULL) &&
1483 (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-contiguous_slots_needed-1) )
1484 ) )
1485 {
1486
1487 //new cf inst for this clause
1488 pAsm->cf_current_alu_clause_ptr = (R700ControlFlowALUClause*) CALLOC_STRUCT(R700ControlFlowALUClause);
1489
1490 // link the new cf to cf segment
1491 if(NULL != pAsm->cf_current_alu_clause_ptr)
1492 {
1493 Init_R700ControlFlowALUClause(pAsm->cf_current_alu_clause_ptr);
1494 AddCFInstruction( pAsm->pR700Shader,
1495 (R700ControlFlowInstruction *)pAsm->cf_current_alu_clause_ptr );
1496 }
1497 else
1498 {
1499 radeon_error("Could not allocate a new ALU CF instruction.\n");
1500 return GL_FALSE;
1501 }
1502
1503 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank0 = 0x0;
1504 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_bank1 = 0x0;
1505 pAsm->cf_current_alu_clause_ptr->m_Word0.f.kcache_mode0 = SQ_CF_KCACHE_NOP;
1506
1507 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_mode1 = SQ_CF_KCACHE_NOP;
1508 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr0 = 0x0;
1509 pAsm->cf_current_alu_clause_ptr->m_Word1.f.kcache_addr1 = 0x0;
1510
1511 //cf_current_alu_clause_ptr->m_Word1.f.count = number_of_scalar_operations - 1;
1512 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count = 0x0;
1513 pAsm->cf_current_alu_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_ALU;
1514
1515 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
1516
1517 pAsm->cf_current_alu_clause_ptr->m_Word1.f.barrier = 0x1;
1518 }
1519 else
1520 {
1521 pAsm->cf_current_alu_clause_ptr->m_Word1.f.count++;
1522 }
1523
1524 // If this clause constains any instruction that is forward dependent on a TEX instruction,
1525 // set the whole_quad_mode for this clause
1526 if ( pAsm->pInstDeps[pAsm->uiCurInst].nDstDep > (-1) )
1527 {
1528 pAsm->cf_current_alu_clause_ptr->m_Word1.f.whole_quad_mode = 0x1;
1529 }
1530
1531 if (pAsm->cf_current_alu_clause_ptr->m_Word1.f.count >= (GetCFMaxInstructions(pAsm->cf_current_alu_clause_ptr->m_ShaderInstType)-1) )
1532 {
1533 alu_instruction_ptr->m_Word0.f.last = 1;
1534 }
1535
1536 if(NULL == pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction)
1537 {
1538 pAsm->cf_current_alu_clause_ptr->m_pLinkedALUInstruction = alu_instruction_ptr;
1539 alu_instruction_ptr->m_pLinkedALUClause = pAsm->cf_current_alu_clause_ptr;
1540 }
1541
1542 AddALUInstruction(pAsm->pR700Shader, alu_instruction_ptr);
1543
1544 return GL_TRUE;
1545 }
1546
1547 void get_src_properties(R700ALUInstruction* alu_instruction_ptr,
1548 int source_index,
1549 BITS* psrc_sel,
1550 BITS* psrc_rel,
1551 BITS* psrc_chan,
1552 BITS* psrc_neg)
1553 {
1554 switch (source_index)
1555 {
1556 case 0:
1557 *psrc_sel = alu_instruction_ptr->m_Word0.f.src0_sel ;
1558 *psrc_rel = alu_instruction_ptr->m_Word0.f.src0_rel ;
1559 *psrc_chan = alu_instruction_ptr->m_Word0.f.src0_chan;
1560 *psrc_neg = alu_instruction_ptr->m_Word0.f.src0_neg ;
1561 break;
1562
1563 case 1:
1564 *psrc_sel = alu_instruction_ptr->m_Word0.f.src1_sel ;
1565 *psrc_rel = alu_instruction_ptr->m_Word0.f.src1_rel ;
1566 *psrc_chan = alu_instruction_ptr->m_Word0.f.src1_chan;
1567 *psrc_neg = alu_instruction_ptr->m_Word0.f.src1_neg ;
1568 break;
1569
1570 case 2:
1571 *psrc_sel = alu_instruction_ptr->m_Word1_OP3.f.src2_sel;
1572 *psrc_rel = alu_instruction_ptr->m_Word1_OP3.f.src2_rel;
1573 *psrc_chan = alu_instruction_ptr->m_Word1_OP3.f.src2_chan;
1574 *psrc_neg = alu_instruction_ptr->m_Word1_OP3.f.src2_neg;
1575 break;
1576 }
1577 }
1578
1579 int is_cfile(BITS sel)
1580 {
1581 if (sel > 255 && sel < 512)
1582 {
1583 return 1;
1584 }
1585 return 0;
1586 }
1587
1588 int is_const(BITS sel)
1589 {
1590 if (is_cfile(sel))
1591 {
1592 return 1;
1593 }
1594 else if(sel >= SQ_ALU_SRC_0 && sel <= SQ_ALU_SRC_LITERAL)
1595 {
1596 return 1;
1597 }
1598 return 0;
1599 }
1600
1601 int is_gpr(BITS sel)
1602 {
1603 if (sel >= 0 && sel < 128)
1604 {
1605 return 1;
1606 }
1607 return 0;
1608 }
1609
1610 const GLuint BANK_SWIZZLE_VEC[8] = {SQ_ALU_VEC_210, //000
1611 SQ_ALU_VEC_120, //001
1612 SQ_ALU_VEC_102, //010
1613
1614 SQ_ALU_VEC_201, //011
1615 SQ_ALU_VEC_012, //100
1616 SQ_ALU_VEC_021, //101
1617
1618 SQ_ALU_VEC_012, //110
1619 SQ_ALU_VEC_012}; //111
1620
1621 const GLuint BANK_SWIZZLE_SCL[8] = {SQ_ALU_SCL_210, //000
1622 SQ_ALU_SCL_122, //001
1623 SQ_ALU_SCL_122, //010
1624
1625 SQ_ALU_SCL_221, //011
1626 SQ_ALU_SCL_212, //100
1627 SQ_ALU_SCL_122, //101
1628
1629 SQ_ALU_SCL_122, //110
1630 SQ_ALU_SCL_122}; //111
1631
1632 GLboolean reserve_cfile(r700_AssemblerBase* pAsm,
1633 GLuint sel,
1634 GLuint chan)
1635 {
1636 int res_match = (-1);
1637 int res_empty = (-1);
1638
1639 GLint res;
1640
1641 for (res=3; res>=0; res--)
1642 {
1643 if(pAsm->hw_cfile_addr[ res] < 0)
1644 {
1645 res_empty = res;
1646 }
1647 else if( (pAsm->hw_cfile_addr[res] == (int)sel)
1648 &&
1649 (pAsm->hw_cfile_chan[ res ] == (int) chan) )
1650 {
1651 res_match = res;
1652 }
1653 }
1654
1655 if(res_match >= 0)
1656 {
1657 // Read for this scalar component already reserved, nothing to do here.
1658 ;
1659 }
1660 else if(res_empty >= 0)
1661 {
1662 pAsm->hw_cfile_addr[ res_empty ] = sel;
1663 pAsm->hw_cfile_chan[ res_empty ] = chan;
1664 }
1665 else
1666 {
1667 radeon_error("All cfile read ports are used, cannot reference C$sel, channel $chan.\n");
1668 return GL_FALSE;
1669 }
1670 return GL_TRUE;
1671 }
1672
1673 GLboolean reserve_gpr(r700_AssemblerBase* pAsm, GLuint sel, GLuint chan, GLuint cycle)
1674 {
1675 if(pAsm->hw_gpr[cycle][chan] < 0)
1676 {
1677 pAsm->hw_gpr[cycle][chan] = sel;
1678 }
1679 else if(pAsm->hw_gpr[cycle][chan] != (int)sel)
1680 {
1681 radeon_error("Another scalar operation has already used GPR read port for given channel\n");
1682 return GL_FALSE;
1683 }
1684
1685 return GL_TRUE;
1686 }
1687
1688 GLboolean cycle_for_scalar_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1689 {
1690 switch (swiz)
1691 {
1692 case SQ_ALU_SCL_210:
1693 {
1694 int table[3] = {2, 1, 0};
1695 *pCycle = table[sel];
1696 return GL_TRUE;
1697 }
1698 break;
1699 case SQ_ALU_SCL_122:
1700 {
1701 int table[3] = {1, 2, 2};
1702 *pCycle = table[sel];
1703 return GL_TRUE;
1704 }
1705 break;
1706 case SQ_ALU_SCL_212:
1707 {
1708 int table[3] = {2, 1, 2};
1709 *pCycle = table[sel];
1710 return GL_TRUE;
1711 }
1712 break;
1713 case SQ_ALU_SCL_221:
1714 {
1715 int table[3] = {2, 2, 1};
1716 *pCycle = table[sel];
1717 return GL_TRUE;
1718 }
1719 break;
1720 default:
1721 radeon_error("Bad Scalar bank swizzle value\n");
1722 break;
1723 }
1724
1725 return GL_FALSE;
1726 }
1727
1728 GLboolean cycle_for_vector_bank_swizzle(const int swiz, const int sel, GLuint* pCycle)
1729 {
1730 switch (swiz)
1731 {
1732 case SQ_ALU_VEC_012:
1733 {
1734 int table[3] = {0, 1, 2};
1735 *pCycle = table[sel];
1736 }
1737 break;
1738 case SQ_ALU_VEC_021:
1739 {
1740 int table[3] = {0, 2, 1};
1741 *pCycle = table[sel];
1742 }
1743 break;
1744 case SQ_ALU_VEC_120:
1745 {
1746 int table[3] = {1, 2, 0};
1747 *pCycle = table[sel];
1748 }
1749 break;
1750 case SQ_ALU_VEC_102:
1751 {
1752 int table[3] = {1, 0, 2};
1753 *pCycle = table[sel];
1754 }
1755 break;
1756 case SQ_ALU_VEC_201:
1757 {
1758 int table[3] = {2, 0, 1};
1759 *pCycle = table[sel];
1760 }
1761 break;
1762 case SQ_ALU_VEC_210:
1763 {
1764 int table[3] = {2, 1, 0};
1765 *pCycle = table[sel];
1766 }
1767 break;
1768 default:
1769 radeon_error("Bad Vec bank swizzle value\n");
1770 return GL_FALSE;
1771 break;
1772 }
1773
1774 return GL_TRUE;
1775 }
1776
1777 GLboolean check_scalar(r700_AssemblerBase* pAsm,
1778 R700ALUInstruction* alu_instruction_ptr)
1779 {
1780 GLuint cycle;
1781 GLuint bank_swizzle;
1782 GLuint const_count = 0;
1783
1784 BITS sel;
1785 BITS chan;
1786 BITS rel;
1787 BITS neg;
1788
1789 GLuint src;
1790
1791 BITS src_sel [3] = {0,0,0};
1792 BITS src_chan[3] = {0,0,0};
1793 BITS src_rel [3] = {0,0,0};
1794 BITS src_neg [3] = {0,0,0};
1795
1796 GLuint swizzle_key;
1797
1798 GLuint number_of_operands = r700GetNumOperands(pAsm);
1799
1800 for (src=0; src<number_of_operands; src++)
1801 {
1802 get_src_properties(alu_instruction_ptr,
1803 src,
1804 &(src_sel[src]),
1805 &(src_rel[src]),
1806 &(src_chan[src]),
1807 &(src_neg[src]) );
1808 }
1809
1810
1811 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1812 (is_const( src_sel[1] ) ? 2 : 0) +
1813 (is_const( src_sel[2] ) ? 1 : 0) );
1814
1815 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_SCL[ swizzle_key ];
1816
1817 for (src=0; src<number_of_operands; src++)
1818 {
1819 sel = src_sel [src];
1820 chan = src_chan[src];
1821 rel = src_rel [src];
1822 neg = src_neg [src];
1823
1824 if (is_const( sel ))
1825 {
1826 // Any constant, including literal and inline constants
1827 const_count++;
1828
1829 if (is_cfile( sel ))
1830 {
1831 reserve_cfile(pAsm, sel, chan);
1832 }
1833
1834 }
1835 }
1836
1837 for (src=0; src<number_of_operands; src++)
1838 {
1839 sel = src_sel [src];
1840 chan = src_chan[src];
1841 rel = src_rel [src];
1842 neg = src_neg [src];
1843
1844 if( is_gpr(sel) )
1845 {
1846 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1847
1848 if( GL_FALSE == cycle_for_scalar_bank_swizzle(bank_swizzle, src, &cycle) )
1849 {
1850 return GL_FALSE;
1851 }
1852
1853 if(cycle < const_count)
1854 {
1855 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1856 {
1857 return GL_FALSE;
1858 }
1859 }
1860 }
1861 }
1862
1863 return GL_TRUE;
1864 }
1865
1866 GLboolean check_vector(r700_AssemblerBase* pAsm,
1867 R700ALUInstruction* alu_instruction_ptr)
1868 {
1869 GLuint cycle;
1870 GLuint bank_swizzle;
1871 GLuint const_count = 0;
1872
1873 GLuint src;
1874
1875 BITS sel;
1876 BITS chan;
1877 BITS rel;
1878 BITS neg;
1879
1880 BITS src_sel [3] = {0,0,0};
1881 BITS src_chan[3] = {0,0,0};
1882 BITS src_rel [3] = {0,0,0};
1883 BITS src_neg [3] = {0,0,0};
1884
1885 GLuint swizzle_key;
1886
1887 GLuint number_of_operands = r700GetNumOperands(pAsm);
1888
1889 for (src=0; src<number_of_operands; src++)
1890 {
1891 get_src_properties(alu_instruction_ptr,
1892 src,
1893 &(src_sel[src]),
1894 &(src_rel[src]),
1895 &(src_chan[src]),
1896 &(src_neg[src]) );
1897 }
1898
1899
1900 swizzle_key = ( (is_const( src_sel[0] ) ? 4 : 0) +
1901 (is_const( src_sel[1] ) ? 2 : 0) +
1902 (is_const( src_sel[2] ) ? 1 : 0)
1903 );
1904
1905 alu_instruction_ptr->m_Word1.f.bank_swizzle = BANK_SWIZZLE_VEC[swizzle_key];
1906
1907 for (src=0; src<number_of_operands; src++)
1908 {
1909 sel = src_sel [src];
1910 chan = src_chan[src];
1911 rel = src_rel [src];
1912 neg = src_neg [src];
1913
1914
1915 bank_swizzle = alu_instruction_ptr->m_Word1.f.bank_swizzle;
1916
1917 if( is_gpr(sel) )
1918 {
1919 if( GL_FALSE == cycle_for_vector_bank_swizzle(bank_swizzle, src, &cycle) )
1920 {
1921 return GL_FALSE;
1922 }
1923
1924 if ( (src == 1) &&
1925 (sel == src_sel[0]) &&
1926 (chan == src_chan[0]) )
1927 {
1928 }
1929 else
1930 {
1931 if( GL_FALSE == reserve_gpr(pAsm, sel, chan, cycle) )
1932 {
1933 return GL_FALSE;
1934 }
1935 }
1936 }
1937 else if( is_const(sel) )
1938 {
1939 const_count++;
1940
1941 if( is_cfile(sel) )
1942 {
1943 if( GL_FALSE == reserve_cfile(pAsm, sel, chan) )
1944 {
1945 return GL_FALSE;
1946 }
1947 }
1948 }
1949 }
1950
1951 return GL_TRUE;
1952 }
1953
1954 GLboolean assemble_alu_instruction(r700_AssemblerBase *pAsm)
1955 {
1956 GLuint number_of_scalar_operations;
1957 GLboolean is_single_scalar_operation;
1958 GLuint scalar_channel_index;
1959
1960 PVSSRC * pcurrent_source;
1961 int current_source_index;
1962 GLuint contiguous_slots_needed;
1963
1964 GLuint uNumSrc = r700GetNumOperands(pAsm);
1965 GLuint channel_swizzle, j;
1966 GLuint chan_counter[4] = {0, 0, 0, 0};
1967 PVSSRC * pSource[3];
1968 GLboolean bSplitInst = GL_FALSE;
1969
1970 if (1 == pAsm->D.dst.math)
1971 {
1972 is_single_scalar_operation = GL_TRUE;
1973 number_of_scalar_operations = 1;
1974 }
1975 else
1976 {
1977 is_single_scalar_operation = GL_FALSE;
1978 number_of_scalar_operations = 4;
1979
1980 /* current assembler doesn't do more than 1 register per source */
1981 #if 0
1982 /* check read port, only very preliminary algorithm, not count in
1983 src0/1 same comp case and prev slot repeat case; also not count relative
1984 addressing. TODO: improve performance. */
1985 for(j=0; j<uNumSrc; j++)
1986 {
1987 pSource[j] = &(pAsm->S[j].src);
1988 }
1989 for(scalar_channel_index=0; scalar_channel_index<4; scalar_channel_index++)
1990 {
1991 for(j=0; j<uNumSrc; j++)
1992 {
1993 switch (scalar_channel_index)
1994 {
1995 case 0: channel_swizzle = pSource[j]->swizzlex; break;
1996 case 1: channel_swizzle = pSource[j]->swizzley; break;
1997 case 2: channel_swizzle = pSource[j]->swizzlez; break;
1998 case 3: channel_swizzle = pSource[j]->swizzlew; break;
1999 default: channel_swizzle = SQ_SEL_MASK; break;
2000 }
2001 if ( ((pSource[j]->rtype == SRC_REG_TEMPORARY) ||
2002 (pSource[j]->rtype == SRC_REG_INPUT))
2003 && (channel_swizzle <= SQ_SEL_W) )
2004 {
2005 chan_counter[channel_swizzle]++;
2006 }
2007 }
2008 }
2009 if( (chan_counter[SQ_SEL_X] > 3)
2010 || (chan_counter[SQ_SEL_Y] > 3)
2011 || (chan_counter[SQ_SEL_Z] > 3)
2012 || (chan_counter[SQ_SEL_W] > 3) ) /* each chan bank has only 3 ports. */
2013 {
2014 bSplitInst = GL_TRUE;
2015 }
2016 #endif
2017 }
2018
2019 contiguous_slots_needed = 0;
2020
2021 if(GL_TRUE == is_reduction_opcode(&(pAsm->D)) )
2022 {
2023 contiguous_slots_needed = 4;
2024 }
2025
2026 initialize(pAsm);
2027
2028 for (scalar_channel_index=0;
2029 scalar_channel_index < number_of_scalar_operations;
2030 scalar_channel_index++)
2031 {
2032 R700ALUInstruction* alu_instruction_ptr = (R700ALUInstruction*) CALLOC_STRUCT(R700ALUInstruction);
2033 if (alu_instruction_ptr == NULL)
2034 {
2035 return GL_FALSE;
2036 }
2037 Init_R700ALUInstruction(alu_instruction_ptr);
2038
2039 //src 0
2040 current_source_index = 0;
2041 pcurrent_source = &(pAsm->S[0].src);
2042
2043 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2044 current_source_index,
2045 pcurrent_source,
2046 scalar_channel_index) )
2047 {
2048 return GL_FALSE;
2049 }
2050
2051 if (uNumSrc > 1)
2052 {
2053 // Process source 1
2054 current_source_index = 1;
2055 pcurrent_source = &(pAsm->S[current_source_index].src);
2056
2057 if (GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2058 current_source_index,
2059 pcurrent_source,
2060 scalar_channel_index) )
2061 {
2062 return GL_FALSE;
2063 }
2064 }
2065
2066 //other bits
2067 alu_instruction_ptr->m_Word0.f.index_mode = SQ_INDEX_LOOP;
2068
2069 if( (is_single_scalar_operation == GL_TRUE)
2070 || (GL_TRUE == bSplitInst) )
2071 {
2072 alu_instruction_ptr->m_Word0.f.last = 1;
2073 }
2074 else
2075 {
2076 alu_instruction_ptr->m_Word0.f.last = (scalar_channel_index == 3) ? 1 : 0;
2077 }
2078
2079 alu_instruction_ptr->m_Word0.f.pred_sel = 0x0;
2080 alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2081 alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2082
2083 // dst
2084 if( (pAsm->D.dst.rtype == DST_REG_TEMPORARY) ||
2085 (pAsm->D.dst.rtype == DST_REG_OUT) )
2086 {
2087 alu_instruction_ptr->m_Word1.f.dst_gpr = pAsm->D.dst.reg;
2088 }
2089 else
2090 {
2091 radeon_error("Only temp destination registers supported for ALU dest regs.\n");
2092 return GL_FALSE;
2093 }
2094
2095 alu_instruction_ptr->m_Word1.f.dst_rel = SQ_ABSOLUTE; //D.rtype
2096
2097 if ( is_single_scalar_operation == GL_TRUE )
2098 {
2099 // Override scalar_channel_index since only one scalar value will be written
2100 if(pAsm->D.dst.writex)
2101 {
2102 scalar_channel_index = 0;
2103 }
2104 else if(pAsm->D.dst.writey)
2105 {
2106 scalar_channel_index = 1;
2107 }
2108 else if(pAsm->D.dst.writez)
2109 {
2110 scalar_channel_index = 2;
2111 }
2112 else if(pAsm->D.dst.writew)
2113 {
2114 scalar_channel_index = 3;
2115 }
2116 }
2117
2118 alu_instruction_ptr->m_Word1.f.dst_chan = scalar_channel_index;
2119
2120 alu_instruction_ptr->m_Word1.f.clamp = pAsm->pILInst[pAsm->uiCurInst].SaturateMode;
2121
2122 if (pAsm->D.dst.op3)
2123 {
2124 //op3
2125
2126 alu_instruction_ptr->m_Word1_OP3.f.alu_inst = pAsm->D.dst.opcode;
2127
2128 //There's 3rd src for op3
2129 current_source_index = 2;
2130 pcurrent_source = &(pAsm->S[current_source_index].src);
2131
2132 if ( GL_FALSE == assemble_alu_src(alu_instruction_ptr,
2133 current_source_index,
2134 pcurrent_source,
2135 scalar_channel_index) )
2136 {
2137 return GL_FALSE;
2138 }
2139 }
2140 else
2141 {
2142 //op2
2143 if (pAsm->bR6xx)
2144 {
2145 alu_instruction_ptr->m_Word1_OP2.f6.alu_inst = pAsm->D.dst.opcode;
2146
2147 alu_instruction_ptr->m_Word1_OP2.f6.src0_abs = 0x0;
2148 alu_instruction_ptr->m_Word1_OP2.f6.src1_abs = 0x0;
2149
2150 //alu_instruction_ptr->m_Word1_OP2.f6.update_execute_mask = 0x0;
2151 //alu_instruction_ptr->m_Word1_OP2.f6.update_pred = 0x0;
2152 switch (scalar_channel_index)
2153 {
2154 case 0:
2155 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writex;
2156 break;
2157 case 1:
2158 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writey;
2159 break;
2160 case 2:
2161 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writez;
2162 break;
2163 case 3:
2164 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = pAsm->D.dst.writew;
2165 break;
2166 default:
2167 alu_instruction_ptr->m_Word1_OP2.f6.write_mask = 1; //SQ_SEL_MASK;
2168 break;
2169 }
2170 alu_instruction_ptr->m_Word1_OP2.f6.omod = SQ_ALU_OMOD_OFF;
2171 }
2172 else
2173 {
2174 alu_instruction_ptr->m_Word1_OP2.f.alu_inst = pAsm->D.dst.opcode;
2175
2176 alu_instruction_ptr->m_Word1_OP2.f.src0_abs = 0x0;
2177 alu_instruction_ptr->m_Word1_OP2.f.src1_abs = 0x0;
2178
2179 //alu_instruction_ptr->m_Word1_OP2.f.update_execute_mask = 0x0;
2180 //alu_instruction_ptr->m_Word1_OP2.f.update_pred = 0x0;
2181 switch (scalar_channel_index)
2182 {
2183 case 0:
2184 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writex;
2185 break;
2186 case 1:
2187 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writey;
2188 break;
2189 case 2:
2190 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writez;
2191 break;
2192 case 3:
2193 alu_instruction_ptr->m_Word1_OP2.f.write_mask = pAsm->D.dst.writew;
2194 break;
2195 default:
2196 alu_instruction_ptr->m_Word1_OP2.f.write_mask = 1; //SQ_SEL_MASK;
2197 break;
2198 }
2199 alu_instruction_ptr->m_Word1_OP2.f.omod = SQ_ALU_OMOD_OFF;
2200 }
2201 }
2202
2203 if(GL_FALSE == add_alu_instruction(pAsm, alu_instruction_ptr, contiguous_slots_needed) )
2204 {
2205 return GL_FALSE;
2206 }
2207
2208 /*
2209 * Judge the type of current instruction, is it vector or scalar
2210 * instruction.
2211 */
2212 if (is_single_scalar_operation)
2213 {
2214 if(GL_FALSE == check_scalar(pAsm, alu_instruction_ptr) )
2215 {
2216 return GL_FALSE;
2217 }
2218 }
2219 else
2220 {
2221 if(GL_FALSE == check_vector(pAsm, alu_instruction_ptr) )
2222 {
2223 return 1;
2224 }
2225 }
2226
2227 contiguous_slots_needed = 0;
2228 }
2229
2230 return GL_TRUE;
2231 }
2232
2233 GLboolean next_ins(r700_AssemblerBase *pAsm)
2234 {
2235 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2236
2237 if( GL_TRUE == pAsm->is_tex )
2238 {
2239 if (pILInst->TexSrcTarget == TEXTURE_RECT_INDEX) {
2240 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_FALSE) )
2241 {
2242 radeon_error("Error assembling TEX instruction\n");
2243 return GL_FALSE;
2244 }
2245 } else {
2246 if( GL_FALSE == assemble_tex_instruction(pAsm, GL_TRUE) )
2247 {
2248 radeon_error("Error assembling TEX instruction\n");
2249 return GL_FALSE;
2250 }
2251 }
2252 }
2253 else
2254 { //ALU
2255 if( GL_FALSE == assemble_alu_instruction(pAsm) )
2256 {
2257 radeon_error("Error assembling ALU instruction\n");
2258 return GL_FALSE;
2259 }
2260 }
2261
2262 if(pAsm->D.dst.rtype == DST_REG_OUT)
2263 {
2264 if(pAsm->D.dst.op3)
2265 {
2266 // There is no mask for OP3 instructions, so all channels are written
2267 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number] = 0xF;
2268 }
2269 else
2270 {
2271 pAsm->pucOutMask[pAsm->D.dst.reg - pAsm->starting_export_register_number]
2272 |= (unsigned char)pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask;
2273 }
2274 }
2275
2276 //reset for next inst.
2277 pAsm->D.bits = 0;
2278 pAsm->S[0].bits = 0;
2279 pAsm->S[1].bits = 0;
2280 pAsm->S[2].bits = 0;
2281 pAsm->is_tex = GL_FALSE;
2282 pAsm->need_tex_barrier = GL_FALSE;
2283 return GL_TRUE;
2284 }
2285
2286 GLboolean assemble_math_function(r700_AssemblerBase* pAsm, BITS opcode)
2287 {
2288 BITS tmp;
2289
2290 checkop1(pAsm);
2291
2292 tmp = gethelpr(pAsm);
2293
2294 // opcode tmp.x, a.x
2295 // MOV dst, tmp.x
2296
2297 pAsm->D.dst.opcode = opcode;
2298 pAsm->D.dst.math = 1;
2299
2300 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2301 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2302 pAsm->D.dst.reg = tmp;
2303 pAsm->D.dst.writex = 1;
2304
2305 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2306 {
2307 return GL_FALSE;
2308 }
2309
2310 if ( GL_FALSE == next_ins(pAsm) )
2311 {
2312 return GL_FALSE;
2313 }
2314
2315 // Now replicate result to all necessary channels in destination
2316 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2317
2318 if( GL_FALSE == assemble_dst(pAsm) )
2319 {
2320 return GL_FALSE;
2321 }
2322
2323 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2324 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
2325 pAsm->S[0].src.reg = tmp;
2326
2327 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
2328 noneg_PVSSRC(&(pAsm->S[0].src));
2329
2330 if( GL_FALSE == next_ins(pAsm) )
2331 {
2332 return GL_FALSE;
2333 }
2334
2335 return GL_TRUE;
2336 }
2337
2338 GLboolean assemble_ABS(r700_AssemblerBase *pAsm)
2339 {
2340 checkop1(pAsm);
2341
2342 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2343
2344 if( GL_FALSE == assemble_dst(pAsm) )
2345 {
2346 return GL_FALSE;
2347 }
2348 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2349 {
2350 return GL_FALSE;
2351 }
2352
2353 pAsm->S[1].bits = pAsm->S[0].bits;
2354 flipneg_PVSSRC(&(pAsm->S[1].src));
2355
2356 if ( GL_FALSE == next_ins(pAsm) )
2357 {
2358 return GL_FALSE;
2359 }
2360
2361 return GL_TRUE;
2362 }
2363
2364 GLboolean assemble_ADD(r700_AssemblerBase *pAsm)
2365 {
2366 if( GL_FALSE == checkop2(pAsm) )
2367 {
2368 return GL_FALSE;
2369 }
2370
2371 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2372
2373 if( GL_FALSE == assemble_dst(pAsm) )
2374 {
2375 return GL_FALSE;
2376 }
2377
2378 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2379 {
2380 return GL_FALSE;
2381 }
2382
2383 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2384 {
2385 return GL_FALSE;
2386 }
2387
2388 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_SUB)
2389 {
2390 flipneg_PVSSRC(&(pAsm->S[1].src));
2391 }
2392
2393 if( GL_FALSE == next_ins(pAsm) )
2394 {
2395 return GL_FALSE;
2396 }
2397
2398 return GL_TRUE;
2399 }
2400
2401 GLboolean assemble_BAD(char *opcode_str)
2402 {
2403 radeon_error("Not yet implemented instruction (%s)\n", opcode_str);
2404 return GL_FALSE;
2405 }
2406
2407 GLboolean assemble_CMP(r700_AssemblerBase *pAsm)
2408 {
2409 int tmp;
2410
2411 if( GL_FALSE == checkop3(pAsm) )
2412 {
2413 return GL_FALSE;
2414 }
2415
2416 pAsm->D.dst.opcode = SQ_OP3_INST_CNDGE;
2417 pAsm->D.dst.op3 = 1;
2418
2419 tmp = (-1);
2420
2421 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2422 {
2423 //OP3 has no support for write mask
2424 tmp = gethelpr(pAsm);
2425
2426 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2427 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2428 pAsm->D.dst.reg = tmp;
2429
2430 nomask_PVSDST(&(pAsm->D.dst));
2431 }
2432 else
2433 {
2434 if( GL_FALSE == assemble_dst(pAsm) )
2435 {
2436 return GL_FALSE;
2437 }
2438 }
2439
2440 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2441 {
2442 return GL_FALSE;
2443 }
2444
2445 if( GL_FALSE == assemble_src(pAsm, 2, 1) )
2446 {
2447 return GL_FALSE;
2448 }
2449
2450 if( GL_FALSE == assemble_src(pAsm, 1, 2) )
2451 {
2452 return GL_FALSE;
2453 }
2454
2455 if ( GL_FALSE == next_ins(pAsm) )
2456 {
2457 return GL_FALSE;
2458 }
2459
2460 if (0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
2461 {
2462 if( GL_FALSE == assemble_dst(pAsm) )
2463 {
2464 return GL_FALSE;
2465 }
2466
2467 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2468
2469 //tmp for source
2470 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2471 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2472 pAsm->S[0].src.reg = tmp;
2473
2474 noneg_PVSSRC(&(pAsm->S[0].src));
2475 noswizzle_PVSSRC(&(pAsm->S[0].src));
2476
2477 if( GL_FALSE == next_ins(pAsm) )
2478 {
2479 return GL_FALSE;
2480 }
2481 }
2482
2483 return GL_TRUE;
2484 }
2485
2486 GLboolean assemble_COS(r700_AssemblerBase *pAsm)
2487 {
2488 return assemble_math_function(pAsm, SQ_OP2_INST_COS);
2489 }
2490
2491 GLboolean assemble_DOT(r700_AssemblerBase *pAsm)
2492 {
2493 if( GL_FALSE == checkop2(pAsm) )
2494 {
2495 return GL_FALSE;
2496 }
2497
2498 pAsm->D.dst.opcode = SQ_OP2_INST_DOT4;
2499
2500 if( GL_FALSE == assemble_dst(pAsm) )
2501 {
2502 return GL_FALSE;
2503 }
2504
2505 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2506 {
2507 return GL_FALSE;
2508 }
2509
2510 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2511 {
2512 return GL_FALSE;
2513 }
2514
2515 if(OPCODE_DP3 == pAsm->pILInst[pAsm->uiCurInst].Opcode)
2516 {
2517 zerocomp_PVSSRC(&(pAsm->S[0].src), 3);
2518 zerocomp_PVSSRC(&(pAsm->S[1].src), 3);
2519 }
2520 else if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_DPH)
2521 {
2522 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2523 }
2524
2525 if ( GL_FALSE == next_ins(pAsm) )
2526 {
2527 return GL_FALSE;
2528 }
2529
2530 return GL_TRUE;
2531 }
2532
2533 GLboolean assemble_DST(r700_AssemblerBase *pAsm)
2534 {
2535 if( GL_FALSE == checkop2(pAsm) )
2536 {
2537 return GL_FALSE;
2538 }
2539
2540 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
2541
2542 if( GL_FALSE == assemble_dst(pAsm) )
2543 {
2544 return GL_FALSE;
2545 }
2546
2547 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2548 {
2549 return GL_FALSE;
2550 }
2551
2552 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2553 {
2554 return GL_FALSE;
2555 }
2556
2557 onecomp_PVSSRC(&(pAsm->S[0].src), 0);
2558 onecomp_PVSSRC(&(pAsm->S[0].src), 3);
2559
2560 onecomp_PVSSRC(&(pAsm->S[1].src), 0);
2561 onecomp_PVSSRC(&(pAsm->S[1].src), 2);
2562
2563 if ( GL_FALSE == next_ins(pAsm) )
2564 {
2565 return GL_FALSE;
2566 }
2567
2568 return GL_TRUE;
2569 }
2570
2571 GLboolean assemble_EX2(r700_AssemblerBase *pAsm)
2572 {
2573 return assemble_math_function(pAsm, SQ_OP2_INST_EXP_IEEE);
2574 }
2575
2576 GLboolean assemble_FLR(r700_AssemblerBase *pAsm)
2577 {
2578 checkop1(pAsm);
2579
2580 pAsm->D.dst.opcode = SQ_OP2_INST_FLOOR;
2581
2582 if ( GL_FALSE == assemble_dst(pAsm) )
2583 {
2584 return GL_FALSE;
2585 }
2586
2587 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2588 {
2589 return GL_FALSE;
2590 }
2591
2592 if ( GL_FALSE == next_ins(pAsm) )
2593 {
2594 return GL_FALSE;
2595 }
2596
2597 return GL_TRUE;
2598 }
2599
2600 GLboolean assemble_FLR_INT(r700_AssemblerBase *pAsm)
2601 {
2602 return assemble_math_function(pAsm, SQ_OP2_INST_FLT_TO_INT);
2603 }
2604
2605 GLboolean assemble_FRC(r700_AssemblerBase *pAsm)
2606 {
2607 checkop1(pAsm);
2608
2609 pAsm->D.dst.opcode = SQ_OP2_INST_FRACT;
2610
2611 if ( GL_FALSE == assemble_dst(pAsm) )
2612 {
2613 return GL_FALSE;
2614 }
2615
2616 if ( GL_FALSE == assemble_src(pAsm, 0, -1) )
2617 {
2618 return GL_FALSE;
2619 }
2620
2621 if ( GL_FALSE == next_ins(pAsm) )
2622 {
2623 return GL_FALSE;
2624 }
2625
2626 return GL_TRUE;
2627 }
2628
2629 GLboolean assemble_KIL(r700_AssemblerBase *pAsm)
2630 {
2631 /* TODO: doc says KILL has to be last(end) ALU clause */
2632
2633 checkop1(pAsm);
2634
2635 pAsm->D.dst.opcode = SQ_OP2_INST_KILLGT;
2636
2637 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2638 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2639 pAsm->D.dst.reg = 0;
2640 pAsm->D.dst.writex = 0;
2641 pAsm->D.dst.writey = 0;
2642 pAsm->D.dst.writez = 0;
2643 pAsm->D.dst.writew = 0;
2644
2645 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2646 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2647 pAsm->S[0].src.reg = 0;
2648
2649 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_0);
2650 noneg_PVSSRC(&(pAsm->S[0].src));
2651
2652 if ( GL_FALSE == assemble_src(pAsm, 0, 1) )
2653 {
2654 return GL_FALSE;
2655 }
2656
2657 if ( GL_FALSE == next_ins(pAsm) )
2658 {
2659 return GL_FALSE;
2660 }
2661
2662 pAsm->pR700Shader->killIsUsed = GL_TRUE;
2663
2664 return GL_TRUE;
2665 }
2666
2667 GLboolean assemble_LG2(r700_AssemblerBase *pAsm)
2668 {
2669 return assemble_math_function(pAsm, SQ_OP2_INST_LOG_IEEE);
2670 }
2671
2672 GLboolean assemble_LRP(r700_AssemblerBase *pAsm)
2673 {
2674 BITS tmp;
2675
2676 if( GL_FALSE == checkop3(pAsm) )
2677 {
2678 return GL_FALSE;
2679 }
2680
2681 tmp = gethelpr(pAsm);
2682
2683 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
2684
2685 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2686 pAsm->D.dst.reg = tmp;
2687 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2688 nomask_PVSDST(&(pAsm->D.dst));
2689
2690
2691 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
2692 {
2693 return GL_FALSE;
2694 }
2695
2696 if ( GL_FALSE == assemble_src(pAsm, 2, 1) )
2697 {
2698 return GL_FALSE;
2699 }
2700
2701 neg_PVSSRC(&(pAsm->S[1].src));
2702
2703 if( GL_FALSE == next_ins(pAsm) )
2704 {
2705 return GL_FALSE;
2706 }
2707
2708 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2709 pAsm->D.dst.op3 = 1;
2710
2711 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2712 pAsm->D.dst.reg = tmp;
2713 nomask_PVSDST(&(pAsm->D.dst));
2714 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2715
2716 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2717 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2718 pAsm->S[0].src.reg = tmp;
2719 noswizzle_PVSSRC(&(pAsm->S[0].src));
2720
2721
2722 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
2723 {
2724 return GL_FALSE;
2725 }
2726 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2727 {
2728 return GL_FALSE;
2729 }
2730
2731 if( GL_FALSE == next_ins(pAsm) )
2732 {
2733 return GL_FALSE;
2734 }
2735
2736 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2737
2738 if( GL_FALSE == assemble_dst(pAsm) )
2739 {
2740 return GL_FALSE;
2741 }
2742
2743 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2744 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2745 pAsm->S[0].src.reg = tmp;
2746 noswizzle_PVSSRC(&(pAsm->S[0].src));
2747
2748 if( GL_FALSE == next_ins(pAsm) )
2749 {
2750 return GL_FALSE;
2751 }
2752
2753 return GL_TRUE;
2754 }
2755
2756 GLboolean assemble_MAD(struct r700_AssemblerBase *pAsm)
2757 {
2758 int tmp, ii;
2759 GLboolean bReplaceDst = GL_FALSE;
2760 struct prog_instruction *pILInst = &(pAsm->pILInst[pAsm->uiCurInst]);
2761
2762 if( GL_FALSE == checkop3(pAsm) )
2763 {
2764 return GL_FALSE;
2765 }
2766
2767 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
2768 pAsm->D.dst.op3 = 1;
2769
2770 tmp = (-1);
2771
2772 if(PROGRAM_TEMPORARY == pILInst->DstReg.File)
2773 { /* TODO : more investigation on MAD src and dst using same register */
2774 for(ii=0; ii<3; ii++)
2775 {
2776 if( (PROGRAM_TEMPORARY == pILInst->SrcReg[ii].File)
2777 && (pILInst->DstReg.Index == pILInst->SrcReg[ii].Index) )
2778 {
2779 bReplaceDst = GL_TRUE;
2780 break;
2781 }
2782 }
2783 }
2784 if(0xF != pILInst->DstReg.WriteMask)
2785 { /* OP3 has no support for write mask */
2786 bReplaceDst = GL_TRUE;
2787 }
2788
2789 if(GL_TRUE == bReplaceDst)
2790 {
2791 tmp = gethelpr(pAsm);
2792
2793 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
2794 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2795 pAsm->D.dst.reg = tmp;
2796
2797 nomask_PVSDST(&(pAsm->D.dst));
2798 }
2799 else
2800 {
2801 if( GL_FALSE == assemble_dst(pAsm) )
2802 {
2803 return GL_FALSE;
2804 }
2805 }
2806
2807 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2808 {
2809 return GL_FALSE;
2810 }
2811
2812 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
2813 {
2814 return GL_FALSE;
2815 }
2816
2817 if( GL_FALSE == assemble_src(pAsm, 2, -1) )
2818 {
2819 return GL_FALSE;
2820 }
2821
2822 if ( GL_FALSE == next_ins(pAsm) )
2823 {
2824 return GL_FALSE;
2825 }
2826
2827 if (GL_TRUE == bReplaceDst)
2828 {
2829 if( GL_FALSE == assemble_dst(pAsm) )
2830 {
2831 return GL_FALSE;
2832 }
2833
2834 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2835
2836 //tmp for source
2837 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2838 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2839 pAsm->S[0].src.reg = tmp;
2840
2841 noneg_PVSSRC(&(pAsm->S[0].src));
2842 noswizzle_PVSSRC(&(pAsm->S[0].src));
2843
2844 if( GL_FALSE == next_ins(pAsm) )
2845 {
2846 return GL_FALSE;
2847 }
2848 }
2849
2850 return GL_TRUE;
2851 }
2852
2853 /* LIT dst, src */
2854 GLboolean assemble_LIT(r700_AssemblerBase *pAsm)
2855 {
2856 unsigned int dstReg;
2857 unsigned int dstType;
2858 unsigned int srcReg;
2859 unsigned int srcType;
2860 checkop1(pAsm);
2861 int tmp = gethelpr(pAsm);
2862
2863 if( GL_FALSE == assemble_dst(pAsm) )
2864 {
2865 return GL_FALSE;
2866 }
2867 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2868 {
2869 return GL_FALSE;
2870 }
2871 dstReg = pAsm->D.dst.reg;
2872 dstType = pAsm->D.dst.rtype;
2873 srcReg = pAsm->S[0].src.reg;
2874 srcType = pAsm->S[0].src.rtype;
2875
2876 /* dst.xw, <- 1.0 */
2877 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
2878 pAsm->D.dst.rtype = dstType;
2879 pAsm->D.dst.reg = dstReg;
2880 pAsm->D.dst.writex = 1;
2881 pAsm->D.dst.writey = 0;
2882 pAsm->D.dst.writez = 0;
2883 pAsm->D.dst.writew = 1;
2884 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
2885 pAsm->S[0].src.reg = tmp;
2886 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2887 noneg_PVSSRC(&(pAsm->S[0].src));
2888 pAsm->S[0].src.swizzlex = SQ_SEL_1;
2889 pAsm->S[0].src.swizzley = SQ_SEL_1;
2890 pAsm->S[0].src.swizzlez = SQ_SEL_1;
2891 pAsm->S[0].src.swizzlew = SQ_SEL_1;
2892 if( GL_FALSE == next_ins(pAsm) )
2893 {
2894 return GL_FALSE;
2895 }
2896
2897 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2898 {
2899 return GL_FALSE;
2900 }
2901
2902 /* dst.y = max(src.x, 0.0) */
2903 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
2904 pAsm->D.dst.rtype = dstType;
2905 pAsm->D.dst.reg = dstReg;
2906 pAsm->D.dst.writex = 0;
2907 pAsm->D.dst.writey = 1;
2908 pAsm->D.dst.writez = 0;
2909 pAsm->D.dst.writew = 0;
2910 pAsm->S[0].src.rtype = srcType;
2911 pAsm->S[0].src.reg = srcReg;
2912 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2913 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
2914 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2915 pAsm->S[1].src.reg = tmp;
2916 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2917 noneg_PVSSRC(&(pAsm->S[1].src));
2918 pAsm->S[1].src.swizzlex = SQ_SEL_0;
2919 pAsm->S[1].src.swizzley = SQ_SEL_0;
2920 pAsm->S[1].src.swizzlez = SQ_SEL_0;
2921 pAsm->S[1].src.swizzlew = SQ_SEL_0;
2922 if( GL_FALSE == next_ins(pAsm) )
2923 {
2924 return GL_FALSE;
2925 }
2926
2927 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2928 {
2929 return GL_FALSE;
2930 }
2931
2932 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y, SQ_SEL_Y);
2933
2934 /* dst.z = log(src.y) */
2935 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_CLAMPED;
2936 pAsm->D.dst.math = 1;
2937 pAsm->D.dst.rtype = dstType;
2938 pAsm->D.dst.reg = dstReg;
2939 pAsm->D.dst.writex = 0;
2940 pAsm->D.dst.writey = 0;
2941 pAsm->D.dst.writez = 1;
2942 pAsm->D.dst.writew = 0;
2943 pAsm->S[0].src.rtype = srcType;
2944 pAsm->S[0].src.reg = srcReg;
2945 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2946 if( GL_FALSE == next_ins(pAsm) )
2947 {
2948 return GL_FALSE;
2949 }
2950
2951 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
2952 {
2953 return GL_FALSE;
2954 }
2955
2956 if( GL_FALSE == assemble_src(pAsm, 0, 2) )
2957 {
2958 return GL_FALSE;
2959 }
2960
2961 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
2962
2963 swizzleagain_PVSSRC(&(pAsm->S[2].src), SQ_SEL_X, SQ_SEL_X, SQ_SEL_X, SQ_SEL_X);
2964
2965 /* tmp.x = amd MUL_LIT(src.w, dst.z, src.x ) */
2966 pAsm->D.dst.opcode = SQ_OP3_INST_MUL_LIT;
2967 pAsm->D.dst.math = 1;
2968 pAsm->D.dst.op3 = 1;
2969 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
2970 pAsm->D.dst.reg = tmp;
2971 pAsm->D.dst.writex = 1;
2972 pAsm->D.dst.writey = 0;
2973 pAsm->D.dst.writez = 0;
2974 pAsm->D.dst.writew = 0;
2975
2976 pAsm->S[0].src.rtype = srcType;
2977 pAsm->S[0].src.reg = srcReg;
2978 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
2979
2980 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
2981 pAsm->S[1].src.reg = dstReg;
2982 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
2983 noneg_PVSSRC(&(pAsm->S[1].src));
2984 pAsm->S[1].src.swizzlex = SQ_SEL_Z;
2985 pAsm->S[1].src.swizzley = SQ_SEL_Z;
2986 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
2987 pAsm->S[1].src.swizzlew = SQ_SEL_Z;
2988
2989 pAsm->S[2].src.rtype = srcType;
2990 pAsm->S[2].src.reg = srcReg;
2991 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
2992
2993 if( GL_FALSE == next_ins(pAsm) )
2994 {
2995 return GL_FALSE;
2996 }
2997
2998 /* dst.z = exp(tmp.x) */
2999 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3000 pAsm->D.dst.math = 1;
3001 pAsm->D.dst.rtype = dstType;
3002 pAsm->D.dst.reg = dstReg;
3003 pAsm->D.dst.writex = 0;
3004 pAsm->D.dst.writey = 0;
3005 pAsm->D.dst.writez = 1;
3006 pAsm->D.dst.writew = 0;
3007
3008 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3009 pAsm->S[0].src.reg = tmp;
3010 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3011 noneg_PVSSRC(&(pAsm->S[0].src));
3012 pAsm->S[0].src.swizzlex = SQ_SEL_X;
3013 pAsm->S[0].src.swizzley = SQ_SEL_X;
3014 pAsm->S[0].src.swizzlez = SQ_SEL_X;
3015 pAsm->S[0].src.swizzlew = SQ_SEL_X;
3016
3017 if( GL_FALSE == next_ins(pAsm) )
3018 {
3019 return GL_FALSE;
3020 }
3021
3022 return GL_TRUE;
3023 }
3024
3025 GLboolean assemble_MAX(r700_AssemblerBase *pAsm)
3026 {
3027 if( GL_FALSE == checkop2(pAsm) )
3028 {
3029 return GL_FALSE;
3030 }
3031
3032 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3033
3034 if( GL_FALSE == assemble_dst(pAsm) )
3035 {
3036 return GL_FALSE;
3037 }
3038
3039 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3040 {
3041 return GL_FALSE;
3042 }
3043
3044 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3045 {
3046 return GL_FALSE;
3047 }
3048
3049 if( GL_FALSE == next_ins(pAsm) )
3050 {
3051 return GL_FALSE;
3052 }
3053
3054 return GL_TRUE;
3055 }
3056
3057 GLboolean assemble_MIN(r700_AssemblerBase *pAsm)
3058 {
3059 if( GL_FALSE == checkop2(pAsm) )
3060 {
3061 return GL_FALSE;
3062 }
3063
3064 pAsm->D.dst.opcode = SQ_OP2_INST_MIN;
3065
3066 if( GL_FALSE == assemble_dst(pAsm) )
3067 {
3068 return GL_FALSE;
3069 }
3070
3071 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3072 {
3073 return GL_FALSE;
3074 }
3075
3076 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3077 {
3078 return GL_FALSE;
3079 }
3080
3081 if( GL_FALSE == next_ins(pAsm) )
3082 {
3083 return GL_FALSE;
3084 }
3085
3086 return GL_TRUE;
3087 }
3088
3089 GLboolean assemble_MOV(r700_AssemblerBase *pAsm)
3090 {
3091 checkop1(pAsm);
3092
3093 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3094
3095 if (GL_FALSE == assemble_dst(pAsm))
3096 {
3097 return GL_FALSE;
3098 }
3099
3100 if (GL_FALSE == assemble_src(pAsm, 0, -1))
3101 {
3102 return GL_FALSE;
3103 }
3104
3105 if ( GL_FALSE == next_ins(pAsm) )
3106 {
3107 return GL_FALSE;
3108 }
3109
3110 return GL_TRUE;
3111 }
3112
3113 GLboolean assemble_MUL(r700_AssemblerBase *pAsm)
3114 {
3115 if( GL_FALSE == checkop2(pAsm) )
3116 {
3117 return GL_FALSE;
3118 }
3119
3120 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3121
3122 if( GL_FALSE == assemble_dst(pAsm) )
3123 {
3124 return GL_FALSE;
3125 }
3126
3127 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3128 {
3129 return GL_FALSE;
3130 }
3131
3132 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3133 {
3134 return GL_FALSE;
3135 }
3136
3137 if( GL_FALSE == next_ins(pAsm) )
3138 {
3139 return GL_FALSE;
3140 }
3141
3142 return GL_TRUE;
3143 }
3144
3145 GLboolean assemble_POW(r700_AssemblerBase *pAsm)
3146 {
3147 BITS tmp;
3148
3149 checkop1(pAsm);
3150
3151 tmp = gethelpr(pAsm);
3152
3153 // LG2 tmp.x, a.swizzle
3154 pAsm->D.dst.opcode = SQ_OP2_INST_LOG_IEEE;
3155 pAsm->D.dst.math = 1;
3156
3157 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3158 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3159 pAsm->D.dst.reg = tmp;
3160 nomask_PVSDST(&(pAsm->D.dst));
3161
3162 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3163 {
3164 return GL_FALSE;
3165 }
3166
3167 if( GL_FALSE == next_ins(pAsm) )
3168 {
3169 return GL_FALSE;
3170 }
3171
3172 // MUL tmp.x, tmp.x, b.swizzle
3173 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3174
3175 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3176 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3177 pAsm->D.dst.reg = tmp;
3178 nomask_PVSDST(&(pAsm->D.dst));
3179
3180 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3181 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3182 pAsm->S[0].src.reg = tmp;
3183 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3184 noneg_PVSSRC(&(pAsm->S[0].src));
3185
3186 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3187 {
3188 return GL_FALSE;
3189 }
3190
3191 if( GL_FALSE == next_ins(pAsm) )
3192 {
3193 return GL_FALSE;
3194 }
3195
3196 // EX2 dst.mask, tmp.x
3197 // EX2 tmp.x, tmp.x
3198 pAsm->D.dst.opcode = SQ_OP2_INST_EXP_IEEE;
3199 pAsm->D.dst.math = 1;
3200
3201 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3202 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3203 pAsm->D.dst.reg = tmp;
3204 nomask_PVSDST(&(pAsm->D.dst));
3205
3206 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3207 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3208 pAsm->S[0].src.reg = tmp;
3209 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3210 noneg_PVSSRC(&(pAsm->S[0].src));
3211
3212 if( GL_FALSE == next_ins(pAsm) )
3213 {
3214 return GL_FALSE;
3215 }
3216
3217 // Now replicate result to all necessary channels in destination
3218 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3219
3220 if( GL_FALSE == assemble_dst(pAsm) )
3221 {
3222 return GL_FALSE;
3223 }
3224
3225 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3226 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3227 pAsm->S[0].src.reg = tmp;
3228
3229 setswizzle_PVSSRC(&(pAsm->S[0].src), SQ_SEL_X);
3230 noneg_PVSSRC(&(pAsm->S[0].src));
3231
3232 if( GL_FALSE == next_ins(pAsm) )
3233 {
3234 return GL_FALSE;
3235 }
3236
3237 return GL_TRUE;
3238 }
3239
3240 GLboolean assemble_RCP(r700_AssemblerBase *pAsm)
3241 {
3242 return assemble_math_function(pAsm, SQ_OP2_INST_RECIP_IEEE);
3243 }
3244
3245 GLboolean assemble_RSQ(r700_AssemblerBase *pAsm)
3246 {
3247 return assemble_math_function(pAsm, SQ_OP2_INST_RECIPSQRT_IEEE);
3248 }
3249
3250 GLboolean assemble_SIN(r700_AssemblerBase *pAsm)
3251 {
3252 return assemble_math_function(pAsm, SQ_OP2_INST_SIN);
3253 }
3254
3255 GLboolean assemble_SCS(r700_AssemblerBase *pAsm)
3256 {
3257 BITS tmp;
3258
3259 checkop1(pAsm);
3260
3261 tmp = gethelpr(pAsm);
3262
3263 // COS tmp.x, a.x
3264 pAsm->D.dst.opcode = SQ_OP2_INST_COS;
3265 pAsm->D.dst.math = 1;
3266
3267 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3268 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3269 pAsm->D.dst.reg = tmp;
3270 pAsm->D.dst.writex = 1;
3271
3272 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3273 {
3274 return GL_FALSE;
3275 }
3276
3277 if ( GL_FALSE == next_ins(pAsm) )
3278 {
3279 return GL_FALSE;
3280 }
3281
3282 // SIN tmp.y, a.x
3283 pAsm->D.dst.opcode = SQ_OP2_INST_SIN;
3284 pAsm->D.dst.math = 1;
3285
3286 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3287 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3288 pAsm->D.dst.reg = tmp;
3289 pAsm->D.dst.writey = 1;
3290
3291 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3292 {
3293 return GL_FALSE;
3294 }
3295
3296 if( GL_FALSE == next_ins(pAsm) )
3297 {
3298 return GL_FALSE;
3299 }
3300
3301 // MOV dst.mask, tmp
3302 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3303
3304 if( GL_FALSE == assemble_dst(pAsm) )
3305 {
3306 return GL_FALSE;
3307 }
3308
3309 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3310 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
3311 pAsm->S[0].src.reg = tmp;
3312
3313 noswizzle_PVSSRC(&(pAsm->S[0].src));
3314 pAsm->S[0].src.swizzlez = SQ_SEL_0;
3315 pAsm->S[0].src.swizzlew = SQ_SEL_0;
3316
3317 if ( GL_FALSE == next_ins(pAsm) )
3318 {
3319 return GL_FALSE;
3320 }
3321
3322 return GL_TRUE;
3323 }
3324
3325 GLboolean assemble_SGE(r700_AssemblerBase *pAsm)
3326 {
3327 if( GL_FALSE == checkop2(pAsm) )
3328 {
3329 return GL_FALSE;
3330 }
3331
3332 pAsm->D.dst.opcode = SQ_OP2_INST_SETGE;
3333
3334 if( GL_FALSE == assemble_dst(pAsm) )
3335 {
3336 return GL_FALSE;
3337 }
3338
3339 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3340 {
3341 return GL_FALSE;
3342 }
3343
3344 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3345 {
3346 return GL_FALSE;
3347 }
3348
3349 if( GL_FALSE == next_ins(pAsm) )
3350 {
3351 return GL_FALSE;
3352 }
3353
3354 return GL_TRUE;
3355 }
3356
3357 GLboolean assemble_SLT(r700_AssemblerBase *pAsm)
3358 {
3359 if( GL_FALSE == checkop2(pAsm) )
3360 {
3361 return GL_FALSE;
3362 }
3363
3364 pAsm->D.dst.opcode = SQ_OP2_INST_SETGT;
3365
3366 if( GL_FALSE == assemble_dst(pAsm) )
3367 {
3368 return GL_FALSE;
3369 }
3370
3371 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3372 {
3373 return GL_FALSE;
3374 }
3375
3376 if( GL_FALSE == assemble_src(pAsm, 1, 0) )
3377 {
3378 return GL_FALSE;
3379 }
3380
3381 if( GL_FALSE == next_ins(pAsm) )
3382 {
3383 return GL_FALSE;
3384 }
3385
3386 return GL_TRUE;
3387 }
3388
3389 GLboolean assemble_STP(r700_AssemblerBase *pAsm)
3390 {
3391 return GL_TRUE;
3392 }
3393
3394 GLboolean assemble_TEX(r700_AssemblerBase *pAsm)
3395 {
3396 GLboolean src_const;
3397 GLboolean need_barrier = GL_FALSE;
3398
3399 checkop1(pAsm);
3400
3401 switch (pAsm->pILInst[pAsm->uiCurInst].SrcReg[0].File)
3402 {
3403 case PROGRAM_CONSTANT:
3404 case PROGRAM_LOCAL_PARAM:
3405 case PROGRAM_ENV_PARAM:
3406 case PROGRAM_STATE_VAR:
3407 src_const = GL_TRUE;
3408 break;
3409 case PROGRAM_TEMPORARY:
3410 case PROGRAM_INPUT:
3411 default:
3412 src_const = GL_FALSE;
3413 break;
3414 }
3415
3416 if (GL_TRUE == src_const)
3417 {
3418 if ( GL_FALSE == mov_temp(pAsm, 0) )
3419 return GL_FALSE;
3420 need_barrier = GL_TRUE;
3421 }
3422
3423 switch (pAsm->pILInst[pAsm->uiCurInst].Opcode)
3424 {
3425 case OPCODE_TEX:
3426 break;
3427 case OPCODE_TXB:
3428 radeon_error("do not support TXB yet\n");
3429 return GL_FALSE;
3430 break;
3431 case OPCODE_TXP:
3432 break;
3433 default:
3434 radeon_error("Internal error: bad texture op (not TEX)\n");
3435 return GL_FALSE;
3436 break;
3437 }
3438
3439 if (pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
3440 {
3441 GLuint tmp = gethelpr(pAsm);
3442 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
3443 pAsm->D.dst.math = 1;
3444 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3445 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3446 pAsm->D.dst.reg = tmp;
3447 pAsm->D.dst.writew = 1;
3448
3449 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3450 {
3451 return GL_FALSE;
3452 }
3453 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_W, SQ_SEL_W, SQ_SEL_W, SQ_SEL_W);
3454 if( GL_FALSE == next_ins(pAsm) )
3455 {
3456 return GL_FALSE;
3457 }
3458
3459 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3460 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3461 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3462 pAsm->D.dst.reg = tmp;
3463 pAsm->D.dst.writex = 1;
3464 pAsm->D.dst.writey = 1;
3465 pAsm->D.dst.writez = 1;
3466 pAsm->D.dst.writew = 0;
3467
3468 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3469 {
3470 return GL_FALSE;
3471 }
3472 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3473 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3474 pAsm->S[1].src.reg = tmp;
3475 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_W);
3476
3477 if( GL_FALSE == next_ins(pAsm) )
3478 {
3479 return GL_FALSE;
3480 }
3481
3482 pAsm->aArgSubst[1] = tmp;
3483 need_barrier = GL_TRUE;
3484 }
3485
3486 if (pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX )
3487 {
3488 GLuint tmp1 = gethelpr(pAsm);
3489 GLuint tmp2 = gethelpr(pAsm);
3490
3491 /* tmp1.xyzw = CUBE(R0.zzxy, R0.yxzz) */
3492 pAsm->D.dst.opcode = SQ_OP2_INST_CUBE;
3493 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3494 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3495 pAsm->D.dst.reg = tmp1;
3496 nomask_PVSDST(&(pAsm->D.dst));
3497
3498 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3499 {
3500 return GL_FALSE;
3501 }
3502
3503 if( GL_FALSE == assemble_src(pAsm, 0, 1) )
3504 {
3505 return GL_FALSE;
3506 }
3507
3508 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y);
3509 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_X, SQ_SEL_Z, SQ_SEL_Z);
3510
3511 if( GL_FALSE == next_ins(pAsm) )
3512 {
3513 return GL_FALSE;
3514 }
3515
3516 /* tmp1.z = ABS(tmp1.z) dont have abs support in assembler currently
3517 * have to do explicit instruction
3518 */
3519 pAsm->D.dst.opcode = SQ_OP2_INST_MAX;
3520 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3521 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3522 pAsm->D.dst.reg = tmp1;
3523 pAsm->D.dst.writez = 1;
3524
3525 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3526 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3527 pAsm->S[0].src.reg = tmp1;
3528 noswizzle_PVSSRC(&(pAsm->S[0].src));
3529 pAsm->S[1].bits = pAsm->S[0].bits;
3530 flipneg_PVSSRC(&(pAsm->S[1].src));
3531
3532 next_ins(pAsm);
3533
3534 /* tmp1.z = RCP_e(|tmp1.z|) */
3535 pAsm->D.dst.opcode = SQ_OP2_INST_RECIP_IEEE;
3536 pAsm->D.dst.math = 1;
3537 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3538 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3539 pAsm->D.dst.reg = tmp1;
3540 pAsm->D.dst.writez = 1;
3541
3542 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3543 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3544 pAsm->S[0].src.reg = tmp1;
3545 pAsm->S[0].src.swizzlex = SQ_SEL_Z;
3546
3547 next_ins(pAsm);
3548
3549 /* MULADD R0.x, R0.x, PS1, (0x3FC00000, 1.5f).x
3550 * MULADD R0.y, R0.y, PS1, (0x3FC00000, 1.5f).x
3551 * muladd has no writemask, have to use another temp
3552 * also no support for imm constants, so add 1 here
3553 */
3554 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3555 pAsm->D.dst.op3 = 1;
3556 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3557 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3558 pAsm->D.dst.reg = tmp2;
3559
3560 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3561 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3562 pAsm->S[0].src.reg = tmp1;
3563 noswizzle_PVSSRC(&(pAsm->S[0].src));
3564 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3565 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3566 pAsm->S[1].src.reg = tmp1;
3567 setswizzle_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z);
3568 setaddrmode_PVSSRC(&(pAsm->S[2].src), ADDR_ABSOLUTE);
3569 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
3570 pAsm->S[2].src.reg = tmp1;
3571 setswizzle_PVSSRC(&(pAsm->S[2].src), SQ_SEL_1);
3572
3573 next_ins(pAsm);
3574
3575 /* ADD the remaining .5 */
3576 pAsm->D.dst.opcode = SQ_OP2_INST_ADD;
3577 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3578 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3579 pAsm->D.dst.reg = tmp2;
3580 pAsm->D.dst.writex = 1;
3581 pAsm->D.dst.writey = 1;
3582 pAsm->D.dst.writez = 0;
3583 pAsm->D.dst.writew = 0;
3584
3585 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3586 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3587 pAsm->S[0].src.reg = tmp2;
3588 noswizzle_PVSSRC(&(pAsm->S[0].src));
3589 setaddrmode_PVSSRC(&(pAsm->S[1].src), ADDR_ABSOLUTE);
3590 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3591 pAsm->S[1].src.reg = 252; // SQ_ALU_SRC_0_5
3592 noswizzle_PVSSRC(&(pAsm->S[1].src));
3593
3594 next_ins(pAsm);
3595
3596 /* tmp1.xy = temp2.xy */
3597 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3598 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3599 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3600 pAsm->D.dst.reg = tmp1;
3601 pAsm->D.dst.writex = 1;
3602 pAsm->D.dst.writey = 1;
3603 pAsm->D.dst.writez = 0;
3604 pAsm->D.dst.writew = 0;
3605
3606 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3607 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3608 pAsm->S[0].src.reg = tmp2;
3609 noswizzle_PVSSRC(&(pAsm->S[0].src));
3610
3611 next_ins(pAsm);
3612 pAsm->aArgSubst[1] = tmp1;
3613 need_barrier = GL_TRUE;
3614
3615 }
3616
3617 pAsm->D.dst.opcode = SQ_TEX_INST_SAMPLE;
3618 pAsm->is_tex = GL_TRUE;
3619 if ( GL_TRUE == need_barrier )
3620 {
3621 pAsm->need_tex_barrier = GL_TRUE;
3622 }
3623 // Set src1 to tex unit id
3624 pAsm->S[1].src.reg = pAsm->pILInst[pAsm->uiCurInst].TexSrcUnit;
3625 pAsm->S[1].src.rtype = SRC_REG_TEMPORARY;
3626
3627 //No sw info from mesa compiler, so hard code here.
3628 pAsm->S[1].src.swizzlex = SQ_SEL_X;
3629 pAsm->S[1].src.swizzley = SQ_SEL_Y;
3630 pAsm->S[1].src.swizzlez = SQ_SEL_Z;
3631 pAsm->S[1].src.swizzlew = SQ_SEL_W;
3632
3633 if( GL_FALSE == tex_dst(pAsm) )
3634 {
3635 return GL_FALSE;
3636 }
3637
3638 if( GL_FALSE == tex_src(pAsm) )
3639 {
3640 return GL_FALSE;
3641 }
3642
3643 if(pAsm->pILInst[pAsm->uiCurInst].Opcode == OPCODE_TXP)
3644 {
3645 /* hopefully did swizzles before */
3646 noswizzle_PVSSRC(&(pAsm->S[0].src));
3647 }
3648
3649 if(pAsm->pILInst[pAsm->uiCurInst].TexSrcTarget == TEXTURE_CUBE_INDEX)
3650 {
3651 /* SAMPLE dst, tmp.yxwy, CUBE */
3652 pAsm->S[0].src.swizzlex = SQ_SEL_Y;
3653 pAsm->S[0].src.swizzley = SQ_SEL_X;
3654 pAsm->S[0].src.swizzlez = SQ_SEL_W;
3655 pAsm->S[0].src.swizzlew = SQ_SEL_Y;
3656 }
3657
3658 if ( GL_FALSE == next_ins(pAsm) )
3659 {
3660 return GL_FALSE;
3661 }
3662
3663 return GL_TRUE;
3664 }
3665
3666 GLboolean assemble_XPD(r700_AssemblerBase *pAsm)
3667 {
3668 BITS tmp;
3669
3670 if( GL_FALSE == checkop2(pAsm) )
3671 {
3672 return GL_FALSE;
3673 }
3674
3675 tmp = gethelpr(pAsm);
3676
3677 pAsm->D.dst.opcode = SQ_OP2_INST_MUL;
3678
3679 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3680 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3681 pAsm->D.dst.reg = tmp;
3682 nomask_PVSDST(&(pAsm->D.dst));
3683
3684 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3685 {
3686 return GL_FALSE;
3687 }
3688
3689 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3690 {
3691 return GL_FALSE;
3692 }
3693
3694 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3695 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3696
3697 if( GL_FALSE == next_ins(pAsm) )
3698 {
3699 return GL_FALSE;
3700 }
3701
3702 pAsm->D.dst.opcode = SQ_OP3_INST_MULADD;
3703 pAsm->D.dst.op3 = 1;
3704
3705 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3706 {
3707 tmp = gethelpr(pAsm);
3708
3709 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
3710 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
3711 pAsm->D.dst.reg = tmp;
3712
3713 nomask_PVSDST(&(pAsm->D.dst));
3714 }
3715 else
3716 {
3717 if( GL_FALSE == assemble_dst(pAsm) )
3718 {
3719 return GL_FALSE;
3720 }
3721 }
3722
3723 if( GL_FALSE == assemble_src(pAsm, 0, -1) )
3724 {
3725 return GL_FALSE;
3726 }
3727
3728 if( GL_FALSE == assemble_src(pAsm, 1, -1) )
3729 {
3730 return GL_FALSE;
3731 }
3732
3733 swizzleagain_PVSSRC(&(pAsm->S[0].src), SQ_SEL_Y, SQ_SEL_Z, SQ_SEL_X, SQ_SEL_0);
3734 swizzleagain_PVSSRC(&(pAsm->S[1].src), SQ_SEL_Z, SQ_SEL_X, SQ_SEL_Y, SQ_SEL_0);
3735
3736 // result1 + (neg) result0
3737 setaddrmode_PVSSRC(&(pAsm->S[2].src),ADDR_ABSOLUTE);
3738 pAsm->S[2].src.rtype = SRC_REG_TEMPORARY;
3739 pAsm->S[2].src.reg = tmp;
3740
3741 neg_PVSSRC(&(pAsm->S[2].src));
3742 noswizzle_PVSSRC(&(pAsm->S[2].src));
3743
3744 if( GL_FALSE == next_ins(pAsm) )
3745 {
3746 return GL_FALSE;
3747 }
3748
3749
3750 if(0xF != pAsm->pILInst[pAsm->uiCurInst].DstReg.WriteMask)
3751 {
3752 if( GL_FALSE == assemble_dst(pAsm) )
3753 {
3754 return GL_FALSE;
3755 }
3756
3757 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
3758
3759 // Use tmp as source
3760 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
3761 pAsm->S[0].src.rtype = SRC_REG_TEMPORARY;
3762 pAsm->S[0].src.reg = tmp;
3763
3764 noneg_PVSSRC(&(pAsm->S[0].src));
3765 noswizzle_PVSSRC(&(pAsm->S[0].src));
3766
3767 if( GL_FALSE == next_ins(pAsm) )
3768 {
3769 return GL_FALSE;
3770 }
3771 }
3772
3773 return GL_TRUE;
3774 }
3775
3776 GLboolean assemble_EXPORT(r700_AssemblerBase *pAsm)
3777 {
3778 return GL_TRUE;
3779 }
3780
3781 GLboolean assemble_IF(r700_AssemblerBase *pAsm)
3782 {
3783 return GL_TRUE;
3784 }
3785
3786 GLboolean assemble_ENDIF(r700_AssemblerBase *pAsm)
3787 {
3788 return GL_TRUE;
3789 }
3790
3791 GLboolean AssembleInstr(GLuint uiNumberInsts,
3792 struct prog_instruction *pILInst,
3793 r700_AssemblerBase *pR700AsmCode)
3794 {
3795 GLuint i;
3796
3797 pR700AsmCode->pILInst = pILInst;
3798 for(i=0; i<uiNumberInsts; i++)
3799 {
3800 pR700AsmCode->uiCurInst = i;
3801
3802 switch (pILInst[i].Opcode)
3803 {
3804 case OPCODE_ABS:
3805 if ( GL_FALSE == assemble_ABS(pR700AsmCode) )
3806 return GL_FALSE;
3807 break;
3808 case OPCODE_ADD:
3809 case OPCODE_SUB:
3810 if ( GL_FALSE == assemble_ADD(pR700AsmCode) )
3811 return GL_FALSE;
3812 break;
3813
3814 case OPCODE_ARL:
3815 radeon_error("Not yet implemented instruction OPCODE_ARL \n");
3816 //if ( GL_FALSE == assemble_BAD("ARL") )
3817 return GL_FALSE;
3818 break;
3819 case OPCODE_ARR:
3820 radeon_error("Not yet implemented instruction OPCODE_ARR \n");
3821 //if ( GL_FALSE == assemble_BAD("ARR") )
3822 return GL_FALSE;
3823 break;
3824
3825 case OPCODE_CMP:
3826 if ( GL_FALSE == assemble_CMP(pR700AsmCode) )
3827 return GL_FALSE;
3828 break;
3829 case OPCODE_COS:
3830 if ( GL_FALSE == assemble_COS(pR700AsmCode) )
3831 return GL_FALSE;
3832 break;
3833
3834 case OPCODE_DP3:
3835 case OPCODE_DP4:
3836 case OPCODE_DPH:
3837 if ( GL_FALSE == assemble_DOT(pR700AsmCode) )
3838 return GL_FALSE;
3839 break;
3840
3841 case OPCODE_DST:
3842 if ( GL_FALSE == assemble_DST(pR700AsmCode) )
3843 return GL_FALSE;
3844 break;
3845
3846 case OPCODE_EX2:
3847 if ( GL_FALSE == assemble_EX2(pR700AsmCode) )
3848 return GL_FALSE;
3849 break;
3850 case OPCODE_EXP:
3851 radeon_error("Not yet implemented instruction OPCODE_EXP \n");
3852 //if ( GL_FALSE == assemble_BAD("EXP") )
3853 return GL_FALSE;
3854 break; // approx of EX2
3855
3856 case OPCODE_FLR:
3857 if ( GL_FALSE == assemble_FLR(pR700AsmCode) )
3858 return GL_FALSE;
3859 break;
3860 //case OP_FLR_INT:
3861 // if ( GL_FALSE == assemble_FLR_INT() )
3862 // return GL_FALSE;
3863 // break;
3864
3865 case OPCODE_FRC:
3866 if ( GL_FALSE == assemble_FRC(pR700AsmCode) )
3867 return GL_FALSE;
3868 break;
3869
3870 case OPCODE_KIL:
3871 if ( GL_FALSE == assemble_KIL(pR700AsmCode) )
3872 return GL_FALSE;
3873 break;
3874 case OPCODE_LG2:
3875 if ( GL_FALSE == assemble_LG2(pR700AsmCode) )
3876 return GL_FALSE;
3877 break;
3878 case OPCODE_LIT:
3879 if ( GL_FALSE == assemble_LIT(pR700AsmCode) )
3880 return GL_FALSE;
3881 break;
3882 case OPCODE_LRP:
3883 if ( GL_FALSE == assemble_LRP(pR700AsmCode) )
3884 return GL_FALSE;
3885 break;
3886 case OPCODE_LOG:
3887 radeon_error("Not yet implemented instruction OPCODE_LOG \n");
3888 //if ( GL_FALSE == assemble_BAD("LOG") )
3889 return GL_FALSE;
3890 break; // approx of LG2
3891
3892 case OPCODE_MAD:
3893 if ( GL_FALSE == assemble_MAD(pR700AsmCode) )
3894 return GL_FALSE;
3895 break;
3896 case OPCODE_MAX:
3897 if ( GL_FALSE == assemble_MAX(pR700AsmCode) )
3898 return GL_FALSE;
3899 break;
3900 case OPCODE_MIN:
3901 if ( GL_FALSE == assemble_MIN(pR700AsmCode) )
3902 return GL_FALSE;
3903 break;
3904
3905 case OPCODE_MOV:
3906 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3907 return GL_FALSE;
3908 break;
3909 case OPCODE_MUL:
3910 if ( GL_FALSE == assemble_MUL(pR700AsmCode) )
3911 return GL_FALSE;
3912 break;
3913
3914 case OPCODE_POW:
3915 if ( GL_FALSE == assemble_POW(pR700AsmCode) )
3916 return GL_FALSE;
3917 break;
3918 case OPCODE_RCP:
3919 if ( GL_FALSE == assemble_RCP(pR700AsmCode) )
3920 return GL_FALSE;
3921 break;
3922 case OPCODE_RSQ:
3923 if ( GL_FALSE == assemble_RSQ(pR700AsmCode) )
3924 return GL_FALSE;
3925 break;
3926 case OPCODE_SIN:
3927 if ( GL_FALSE == assemble_SIN(pR700AsmCode) )
3928 return GL_FALSE;
3929 break;
3930 case OPCODE_SCS:
3931 if ( GL_FALSE == assemble_SCS(pR700AsmCode) )
3932 return GL_FALSE;
3933 break;
3934
3935 case OPCODE_SGE:
3936 if ( GL_FALSE == assemble_SGE(pR700AsmCode) )
3937 return GL_FALSE;
3938 break;
3939 case OPCODE_SLT:
3940 if ( GL_FALSE == assemble_SLT(pR700AsmCode) )
3941 return GL_FALSE;
3942 break;
3943
3944 //case OP_STP:
3945 // if ( GL_FALSE == assemble_STP(pR700AsmCode) )
3946 // return GL_FALSE;
3947 // break;
3948
3949 case OPCODE_SWZ:
3950 if ( GL_FALSE == assemble_MOV(pR700AsmCode) )
3951 {
3952 return GL_FALSE;
3953 }
3954 else
3955 {
3956 if( (i+1)<uiNumberInsts )
3957 {
3958 if(OPCODE_END != pILInst[i+1].Opcode)
3959 {
3960 if( GL_TRUE == IsTex(pILInst[i+1].Opcode) )
3961 {
3962 pR700AsmCode->pInstDeps[i+1].nDstDep = i+1; //=1?
3963 }
3964 }
3965 }
3966 }
3967 break;
3968
3969 case OPCODE_TEX:
3970 case OPCODE_TXB:
3971 case OPCODE_TXP:
3972 if ( GL_FALSE == assemble_TEX(pR700AsmCode) )
3973 return GL_FALSE;
3974 break;
3975
3976 case OPCODE_XPD:
3977 if ( GL_FALSE == assemble_XPD(pR700AsmCode) )
3978 return GL_FALSE;
3979 break;
3980
3981 case OPCODE_IF :
3982 if ( GL_FALSE == assemble_IF(pR700AsmCode) )
3983 return GL_FALSE;
3984 break;
3985 case OPCODE_ELSE :
3986 radeon_error("Not yet implemented instruction OPCODE_ELSE \n");
3987 //if ( GL_FALSE == assemble_BAD("ELSE") )
3988 return GL_FALSE;
3989 break;
3990 case OPCODE_ENDIF:
3991 if ( GL_FALSE == assemble_ENDIF(pR700AsmCode) )
3992 return GL_FALSE;
3993 break;
3994
3995 //case OPCODE_EXPORT:
3996 // if ( GL_FALSE == assemble_EXPORT() )
3997 // return GL_FALSE;
3998 // break;
3999
4000 case OPCODE_END:
4001 //pR700AsmCode->uiCurInst = i;
4002 //This is to remaind that if in later exoort there is depth/stencil
4003 //export, we need a mov to re-arrange DST channel, where using a
4004 //psuedo inst, we will use this end inst to do it.
4005 return GL_TRUE;
4006
4007 default:
4008 radeon_error("internal: unknown instruction\n");
4009 return GL_FALSE;
4010 }
4011 }
4012
4013 return GL_TRUE;
4014 }
4015
4016 GLboolean Process_Export(r700_AssemblerBase* pAsm,
4017 GLuint type,
4018 GLuint export_starting_index,
4019 GLuint export_count,
4020 GLuint starting_register_number,
4021 GLboolean is_depth_export)
4022 {
4023 unsigned char ucWriteMask;
4024
4025 check_current_clause(pAsm, CF_EMPTY_CLAUSE);
4026 check_current_clause(pAsm, CF_EXPORT_CLAUSE); //alloc the cf_current_export_clause_ptr
4027
4028 pAsm->cf_current_export_clause_ptr->m_Word0.f.type = type;
4029
4030 switch (type)
4031 {
4032 case SQ_EXPORT_PIXEL:
4033 if(GL_TRUE == is_depth_export)
4034 {
4035 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_Z;
4036 }
4037 else
4038 {
4039 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_PIXEL_MRT0 + export_starting_index;
4040 }
4041 break;
4042
4043 case SQ_EXPORT_POS:
4044 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = SQ_CF_POS_0 + export_starting_index;
4045 break;
4046
4047 case SQ_EXPORT_PARAM:
4048 pAsm->cf_current_export_clause_ptr->m_Word0.f.array_base = 0x0 + export_starting_index;
4049 break;
4050
4051 default:
4052 radeon_error("Unknown export type: %d\n", type);
4053 return GL_FALSE;
4054 break;
4055 }
4056
4057 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_gpr = starting_register_number;
4058
4059 pAsm->cf_current_export_clause_ptr->m_Word0.f.rw_rel = SQ_ABSOLUTE;
4060 pAsm->cf_current_export_clause_ptr->m_Word0.f.index_gpr = 0x0;
4061 pAsm->cf_current_export_clause_ptr->m_Word0.f.elem_size = 0x3;
4062
4063 pAsm->cf_current_export_clause_ptr->m_Word1.f.burst_count = (export_count - 1);
4064 pAsm->cf_current_export_clause_ptr->m_Word1.f.end_of_program = 0x0;
4065 pAsm->cf_current_export_clause_ptr->m_Word1.f.valid_pixel_mode = 0x0;
4066 pAsm->cf_current_export_clause_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT; // _DONE
4067 pAsm->cf_current_export_clause_ptr->m_Word1.f.whole_quad_mode = 0x0;
4068 pAsm->cf_current_export_clause_ptr->m_Word1.f.barrier = 0x1;
4069
4070 if (export_count == 1)
4071 {
4072 ucWriteMask = pAsm->pucOutMask[starting_register_number - pAsm->starting_export_register_number];
4073 /* exports Z as a float into Red channel */
4074 if (GL_TRUE == is_depth_export)
4075 ucWriteMask = 0x1;
4076
4077 if( (ucWriteMask & 0x1) != 0)
4078 {
4079 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
4080 }
4081 else
4082 {
4083 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_MASK;
4084 }
4085 if( ((ucWriteMask>>1) & 0x1) != 0)
4086 {
4087 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
4088 }
4089 else
4090 {
4091 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_MASK;
4092 }
4093 if( ((ucWriteMask>>2) & 0x1) != 0)
4094 {
4095 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
4096 }
4097 else
4098 {
4099 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_MASK;
4100 }
4101 if( ((ucWriteMask>>3) & 0x1) != 0)
4102 {
4103 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
4104 }
4105 else
4106 {
4107 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_MASK;
4108 }
4109 }
4110 else
4111 {
4112 // This should only be used if all components for all registers have been written
4113 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_X;
4114 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_Y;
4115 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_Z;
4116 pAsm->cf_current_export_clause_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_W;
4117 }
4118
4119 pAsm->cf_last_export_ptr = pAsm->cf_current_export_clause_ptr;
4120
4121 return GL_TRUE;
4122 }
4123
4124 GLboolean Move_Depth_Exports_To_Correct_Channels(r700_AssemblerBase *pAsm, BITS depth_channel_select)
4125 {
4126 gl_inst_opcode Opcode_save = pAsm->pILInst[pAsm->uiCurInst].Opcode; //Should be OPCODE_END
4127 pAsm->pILInst[pAsm->uiCurInst].Opcode = OPCODE_MOV;
4128
4129 // MOV depth_export_register.hw_depth_channel, depth_export_register.depth_channel_select
4130
4131 pAsm->D.dst.opcode = SQ_OP2_INST_MOV;
4132
4133 setaddrmode_PVSDST(&(pAsm->D.dst), ADDR_ABSOLUTE);
4134 pAsm->D.dst.rtype = DST_REG_TEMPORARY;
4135 pAsm->D.dst.reg = pAsm->depth_export_register_number;
4136
4137 pAsm->D.dst.writex = 1; // depth goes in R channel for HW
4138
4139 setaddrmode_PVSSRC(&(pAsm->S[0].src), ADDR_ABSOLUTE);
4140 pAsm->S[0].src.rtype = DST_REG_TEMPORARY;
4141 pAsm->S[0].src.reg = pAsm->depth_export_register_number;
4142
4143 setswizzle_PVSSRC(&(pAsm->S[0].src), depth_channel_select);
4144
4145 noneg_PVSSRC(&(pAsm->S[0].src));
4146
4147 if( GL_FALSE == next_ins(pAsm) )
4148 {
4149 return GL_FALSE;
4150 }
4151
4152 pAsm->pILInst[pAsm->uiCurInst].Opcode = Opcode_save;
4153
4154 return GL_TRUE;
4155 }
4156
4157 GLboolean Process_Fragment_Exports(r700_AssemblerBase *pR700AsmCode,
4158 GLbitfield OutputsWritten)
4159 {
4160 unsigned int unBit;
4161
4162 if(pR700AsmCode->depth_export_register_number >= 0)
4163 {
4164 if( GL_FALSE == Move_Depth_Exports_To_Correct_Channels(pR700AsmCode, SQ_SEL_Z) ) // depth
4165 {
4166 return GL_FALSE;
4167 }
4168 }
4169
4170 unBit = 1 << FRAG_RESULT_COLOR;
4171 if(OutputsWritten & unBit)
4172 {
4173 if( GL_FALSE == Process_Export(pR700AsmCode,
4174 SQ_EXPORT_PIXEL,
4175 0,
4176 1,
4177 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_COLOR],
4178 GL_FALSE) )
4179 {
4180 return GL_FALSE;
4181 }
4182 }
4183 unBit = 1 << FRAG_RESULT_DEPTH;
4184 if(OutputsWritten & unBit)
4185 {
4186 if( GL_FALSE == Process_Export(pR700AsmCode,
4187 SQ_EXPORT_PIXEL,
4188 0,
4189 1,
4190 pR700AsmCode->uiFP_OutputMap[FRAG_RESULT_DEPTH],
4191 GL_TRUE))
4192 {
4193 return GL_FALSE;
4194 }
4195 }
4196
4197 if(pR700AsmCode->cf_last_export_ptr != NULL)
4198 {
4199 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4200 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4201 }
4202
4203 return GL_TRUE;
4204 }
4205
4206 GLboolean Process_Vertex_Exports(r700_AssemblerBase *pR700AsmCode,
4207 GLbitfield OutputsWritten)
4208 {
4209 unsigned int unBit;
4210 unsigned int i;
4211
4212 GLuint export_starting_index = 0;
4213 GLuint export_count = pR700AsmCode->number_of_exports;
4214
4215 unBit = 1 << VERT_RESULT_HPOS;
4216 if(OutputsWritten & unBit)
4217 {
4218 if( GL_FALSE == Process_Export(pR700AsmCode,
4219 SQ_EXPORT_POS,
4220 export_starting_index,
4221 1,
4222 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_HPOS],
4223 GL_FALSE) )
4224 {
4225 return GL_FALSE;
4226 }
4227
4228 export_count--;
4229
4230 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4231 }
4232
4233 pR700AsmCode->number_of_exports = export_count;
4234
4235 unBit = 1 << VERT_RESULT_COL0;
4236 if(OutputsWritten & unBit)
4237 {
4238 if( GL_FALSE == Process_Export(pR700AsmCode,
4239 SQ_EXPORT_PARAM,
4240 export_starting_index,
4241 1,
4242 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL0],
4243 GL_FALSE) )
4244 {
4245 return GL_FALSE;
4246 }
4247
4248 export_starting_index++;
4249 }
4250
4251 unBit = 1 << VERT_RESULT_COL1;
4252 if(OutputsWritten & unBit)
4253 {
4254 if( GL_FALSE == Process_Export(pR700AsmCode,
4255 SQ_EXPORT_PARAM,
4256 export_starting_index,
4257 1,
4258 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_COL1],
4259 GL_FALSE) )
4260 {
4261 return GL_FALSE;
4262 }
4263
4264 export_starting_index++;
4265 }
4266
4267 unBit = 1 << VERT_RESULT_FOGC;
4268 if(OutputsWritten & unBit)
4269 {
4270 if( GL_FALSE == Process_Export(pR700AsmCode,
4271 SQ_EXPORT_PARAM,
4272 export_starting_index,
4273 1,
4274 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_FOGC],
4275 GL_FALSE) )
4276 {
4277 return GL_FALSE;
4278 }
4279
4280 export_starting_index++;
4281 }
4282
4283 for(i=0; i<8; i++)
4284 {
4285 unBit = 1 << (VERT_RESULT_TEX0 + i);
4286 if(OutputsWritten & unBit)
4287 {
4288 if( GL_FALSE == Process_Export(pR700AsmCode,
4289 SQ_EXPORT_PARAM,
4290 export_starting_index,
4291 1,
4292 pR700AsmCode->ucVP_OutputMap[VERT_RESULT_TEX0 + i],
4293 GL_FALSE) )
4294 {
4295 return GL_FALSE;
4296 }
4297
4298 export_starting_index++;
4299 }
4300 }
4301
4302 // At least one param should be exported
4303 if (export_count)
4304 {
4305 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4306 }
4307 else
4308 {
4309 if( GL_FALSE == Process_Export(pR700AsmCode,
4310 SQ_EXPORT_PARAM,
4311 0,
4312 1,
4313 pR700AsmCode->starting_export_register_number,
4314 GL_FALSE) )
4315 {
4316 return GL_FALSE;
4317 }
4318
4319 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_x = SQ_SEL_0;
4320 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_y = SQ_SEL_0;
4321 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_z = SQ_SEL_0;
4322 pR700AsmCode->cf_last_export_ptr->m_Word1_SWIZ.f.sel_w = SQ_SEL_1;
4323 pR700AsmCode->cf_last_export_ptr->m_Word1.f.cf_inst = SQ_CF_INST_EXPORT_DONE;
4324 }
4325
4326 pR700AsmCode->cf_last_export_ptr->m_Word1.f.end_of_program = 0x1;
4327
4328 return GL_TRUE;
4329 }
4330
4331 GLboolean Clean_Up_Assembler(r700_AssemblerBase *pR700AsmCode)
4332 {
4333 FREE(pR700AsmCode->pucOutMask);
4334 FREE(pR700AsmCode->pInstDeps);
4335 return GL_TRUE;
4336 }
4337