1 /**************************************************************************
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 #include "main/glheader.h"
31 #include "main/macros.h"
32 #include "main/enums.h"
34 #include "tnl/t_context.h"
35 #include "intel_batchbuffer.h"
38 #include "i915_context.h"
39 #include "i915_program.h"
42 #define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
43 #define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
44 #define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
45 #define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
46 #define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
47 #define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
48 #define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
49 #define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
51 /* These are special, and don't have swizzle/negate bits.
53 #define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
54 #define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
55 (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
58 /* Macros for translating UREG's into the various register fields used
59 * by the I915 programmable unit.
61 #define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
62 #define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
63 #define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
64 #define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
65 #define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
66 #define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
68 #define UREG_MASK 0xffffff00
69 #define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
70 (REG_NR_MASK << UREG_NR_SHIFT))
73 #define I915_CONSTFLAG_PARAM 0x1f
76 i915_get_temp(struct i915_fragment_program
*p
)
78 int bit
= ffs(~p
->temp_flag
);
80 fprintf(stderr
, "%s: out of temporaries\n", __FILE__
);
84 p
->temp_flag
|= 1 << (bit
- 1);
85 return UREG(REG_TYPE_R
, (bit
- 1));
90 i915_get_utemp(struct i915_fragment_program
* p
)
92 int bit
= ffs(~p
->utemp_flag
);
94 fprintf(stderr
, "%s: out of temporaries\n", __FILE__
);
98 p
->utemp_flag
|= 1 << (bit
- 1);
99 return UREG(REG_TYPE_U
, (bit
- 1));
103 i915_release_utemps(struct i915_fragment_program
*p
)
105 p
->utemp_flag
= ~0x7;
110 i915_emit_decl(struct i915_fragment_program
*p
,
111 GLuint type
, GLuint nr
, GLuint d0_flags
)
113 GLuint reg
= UREG(type
, nr
);
115 if (type
== REG_TYPE_T
) {
116 if (p
->decl_t
& (1 << nr
))
119 p
->decl_t
|= (1 << nr
);
121 else if (type
== REG_TYPE_S
) {
122 if (p
->decl_s
& (1 << nr
))
125 p
->decl_s
|= (1 << nr
);
130 *(p
->decl
++) = (D0_DCL
| D0_DEST(reg
) | d0_flags
);
131 *(p
->decl
++) = D1_MBZ
;
132 *(p
->decl
++) = D2_MBZ
;
133 assert(p
->decl
<= p
->declarations
+ ARRAY_SIZE(p
->declarations
));
140 i915_emit_arith(struct i915_fragment_program
* p
,
144 GLuint saturate
, GLuint src0
, GLuint src1
, GLuint src2
)
149 assert(GET_UREG_TYPE(dest
) != REG_TYPE_CONST
);
150 dest
= UREG(GET_UREG_TYPE(dest
), GET_UREG_NR(dest
));
153 if (GET_UREG_TYPE(src0
) == REG_TYPE_CONST
)
155 if (GET_UREG_TYPE(src1
) == REG_TYPE_CONST
)
157 if (GET_UREG_TYPE(src2
) == REG_TYPE_CONST
)
160 /* Recursively call this function to MOV additional const values
161 * into temporary registers. Use utemp registers for this -
162 * currently shouldn't be possible to run out, but keep an eye on
166 GLuint s
[3], first
, i
, old_utemp_flag
;
171 old_utemp_flag
= p
->utemp_flag
;
173 first
= GET_UREG_NR(s
[c
[0]]);
174 for (i
= 1; i
< nr_const
; i
++) {
175 if (GET_UREG_NR(s
[c
[i
]]) != first
) {
176 GLuint tmp
= i915_get_utemp(p
);
178 i915_emit_arith(p
, A0_MOV
, tmp
, A0_DEST_CHANNEL_ALL
, 0,
187 p
->utemp_flag
= old_utemp_flag
; /* restore */
190 if (p
->csr
>= p
->program
+ ARRAY_SIZE(p
->program
)) {
191 i915_program_error(p
, "Program contains too many instructions");
195 *(p
->csr
++) = (op
| A0_DEST(dest
) | mask
| saturate
| A0_SRC0(src0
));
196 *(p
->csr
++) = (A1_SRC0(src0
) | A1_SRC1(src1
));
197 *(p
->csr
++) = (A2_SRC1(src1
) | A2_SRC2(src2
));
199 if (GET_UREG_TYPE(dest
) == REG_TYPE_R
)
200 p
->register_phases
[GET_UREG_NR(dest
)] = p
->nr_tex_indirect
;
206 static GLuint
get_free_rreg (struct i915_fragment_program
*p
,
209 int bit
= ffs(~live_regs
);
211 i915_program_error(p
, "Can't find free R reg");
214 return UREG(REG_TYPE_R
, bit
- 1);
217 GLuint
i915_emit_texld( struct i915_fragment_program
*p
,
225 if (coord
!= UREG(GET_UREG_TYPE(coord
), GET_UREG_NR(coord
))) {
226 /* With the help of the "needed registers" table created earlier, pick
227 * a register we can MOV the swizzled TC to (since TEX doesn't support
228 * swizzled sources) */
229 GLuint swizCoord
= get_free_rreg(p
, live_regs
);
230 if (swizCoord
== UREG_BAD
)
233 i915_emit_arith( p
, A0_MOV
, swizCoord
, A0_DEST_CHANNEL_ALL
, 0, coord
, 0, 0 );
237 /* Don't worry about saturate as we only support texture formats
238 * that are always in the 0..1 range.
240 if (destmask
!= A0_DEST_CHANNEL_ALL
) {
241 GLuint tmp
= i915_get_utemp(p
);
242 i915_emit_texld( p
, 0, tmp
, A0_DEST_CHANNEL_ALL
, sampler
, coord
, op
);
243 i915_emit_arith( p
, A0_MOV
, dest
, destmask
, 0, tmp
, 0, 0 );
247 assert(GET_UREG_TYPE(dest
) != REG_TYPE_CONST
);
248 assert(dest
== UREG(GET_UREG_TYPE(dest
), GET_UREG_NR(dest
)));
249 /* Can't use unsaved temps for coords, as the phase boundary would result
250 * in the contents becoming undefined.
252 assert(GET_UREG_TYPE(coord
) != REG_TYPE_U
);
254 if ((GET_UREG_TYPE(coord
) != REG_TYPE_R
) &&
255 (GET_UREG_TYPE(coord
) != REG_TYPE_OC
) &&
256 (GET_UREG_TYPE(coord
) != REG_TYPE_OD
) &&
257 (GET_UREG_TYPE(coord
) != REG_TYPE_T
)) {
258 GLuint tmpCoord
= get_free_rreg(p
, live_regs
);
260 if (tmpCoord
== UREG_BAD
)
263 i915_emit_arith(p
, A0_MOV
, tmpCoord
, A0_DEST_CHANNEL_ALL
, 0, coord
, 0, 0);
267 /* Output register being oC or oD defines a phase boundary */
268 if (GET_UREG_TYPE(dest
) == REG_TYPE_OC
||
269 GET_UREG_TYPE(dest
) == REG_TYPE_OD
)
270 p
->nr_tex_indirect
++;
272 /* Reading from an r# register whose contents depend on output of the
273 * current phase defines a phase boundary.
275 if (GET_UREG_TYPE(coord
) == REG_TYPE_R
&&
276 p
->register_phases
[GET_UREG_NR(coord
)] == p
->nr_tex_indirect
)
277 p
->nr_tex_indirect
++;
279 if (p
->csr
>= p
->program
+ ARRAY_SIZE(p
->program
)) {
280 i915_program_error(p
, "Program contains too many instructions");
286 T0_SAMPLER( sampler
));
288 *(p
->csr
++) = T1_ADDRESS_REG( coord
);
289 *(p
->csr
++) = T2_MBZ
;
291 if (GET_UREG_TYPE(dest
) == REG_TYPE_R
)
292 p
->register_phases
[GET_UREG_NR(dest
)] = p
->nr_tex_indirect
;
301 i915_emit_const1f(struct i915_fragment_program
* p
, GLfloat c0
)
306 return swizzle(UREG(REG_TYPE_R
, 0), ZERO
, ZERO
, ZERO
, ZERO
);
308 return swizzle(UREG(REG_TYPE_R
, 0), ONE
, ONE
, ONE
, ONE
);
310 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
311 if (p
->constant_flags
[reg
] == I915_CONSTFLAG_PARAM
)
313 for (idx
= 0; idx
< 4; idx
++) {
314 if (!(p
->constant_flags
[reg
] & (1 << idx
)) ||
315 p
->constant
[reg
][idx
] == c0
) {
316 p
->constant
[reg
][idx
] = c0
;
317 p
->constant_flags
[reg
] |= 1 << idx
;
318 if (reg
+ 1 > p
->nr_constants
)
319 p
->nr_constants
= reg
+ 1;
320 return swizzle(UREG(REG_TYPE_CONST
, reg
), idx
, ZERO
, ZERO
, ONE
);
325 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
331 i915_emit_const2f(struct i915_fragment_program
* p
, GLfloat c0
, GLfloat c1
)
336 return swizzle(i915_emit_const1f(p
, c1
), ZERO
, X
, Z
, W
);
338 return swizzle(i915_emit_const1f(p
, c1
), ONE
, X
, Z
, W
);
341 return swizzle(i915_emit_const1f(p
, c0
), X
, ZERO
, Z
, W
);
343 return swizzle(i915_emit_const1f(p
, c0
), X
, ONE
, Z
, W
);
345 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
346 if (p
->constant_flags
[reg
] == 0xf ||
347 p
->constant_flags
[reg
] == I915_CONSTFLAG_PARAM
)
349 for (idx
= 0; idx
< 3; idx
++) {
350 if (!(p
->constant_flags
[reg
] & (3 << idx
))) {
351 p
->constant
[reg
][idx
] = c0
;
352 p
->constant
[reg
][idx
+ 1] = c1
;
353 p
->constant_flags
[reg
] |= 3 << idx
;
354 if (reg
+ 1 > p
->nr_constants
)
355 p
->nr_constants
= reg
+ 1;
356 return swizzle(UREG(REG_TYPE_CONST
, reg
), idx
, idx
+ 1, ZERO
,
362 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
370 i915_emit_const4f(struct i915_fragment_program
* p
,
371 GLfloat c0
, GLfloat c1
, GLfloat c2
, GLfloat c3
)
375 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
376 if (p
->constant_flags
[reg
] == 0xf &&
377 p
->constant
[reg
][0] == c0
&&
378 p
->constant
[reg
][1] == c1
&&
379 p
->constant
[reg
][2] == c2
&& p
->constant
[reg
][3] == c3
) {
380 return UREG(REG_TYPE_CONST
, reg
);
382 else if (p
->constant_flags
[reg
] == 0) {
383 p
->constant
[reg
][0] = c0
;
384 p
->constant
[reg
][1] = c1
;
385 p
->constant
[reg
][2] = c2
;
386 p
->constant
[reg
][3] = c3
;
387 p
->constant_flags
[reg
] = 0xf;
388 if (reg
+ 1 > p
->nr_constants
)
389 p
->nr_constants
= reg
+ 1;
390 return UREG(REG_TYPE_CONST
, reg
);
394 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
401 i915_emit_const4fv(struct i915_fragment_program
* p
, const GLfloat
* c
)
403 return i915_emit_const4f(p
, c
[0], c
[1], c
[2], c
[3]);
408 i915_emit_param4fv(struct i915_fragment_program
* p
, const GLfloat
* values
)
412 for (i
= 0; i
< p
->nr_params
; i
++) {
413 if (p
->param
[i
].values
== values
)
414 return UREG(REG_TYPE_CONST
, p
->param
[i
].reg
);
418 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
419 if (p
->constant_flags
[reg
] == 0) {
420 p
->constant_flags
[reg
] = I915_CONSTFLAG_PARAM
;
423 p
->param
[i
].values
= values
;
424 p
->param
[i
].reg
= reg
;
425 p
->params_uptodate
= 0;
427 if (reg
+ 1 > p
->nr_constants
)
428 p
->nr_constants
= reg
+ 1;
429 return UREG(REG_TYPE_CONST
, reg
);
433 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
438 /* Warning the user about program errors seems to be quite valuable, from
439 * our bug reports. It unfortunately means piglit reporting errors
440 * when we fall back to software due to an unsupportable program, though.
443 i915_program_error(struct i915_fragment_program
*p
, const char *fmt
, ...)
445 if (unlikely((INTEL_DEBUG
& (DEBUG_WM
| DEBUG_FALLBACKS
)) != 0)) {
448 fprintf(stderr
, "i915_program_error: ");
450 vfprintf(stderr
, fmt
, args
);
453 fprintf(stderr
, "\n");
460 i915_init_program(struct i915_context
*i915
, struct i915_fragment_program
*p
)
462 struct gl_context
*ctx
= &i915
->intel
.ctx
;
465 p
->params_uptodate
= 0;
469 memset(&p
->register_phases
, 0, sizeof(p
->register_phases
));
470 p
->nr_tex_indirect
= 1;
476 memset(p
->constant_flags
, 0, sizeof(p
->constant_flags
));
480 p
->decl
= p
->declarations
;
483 p
->temp_flag
= 0xffff000;
484 p
->utemp_flag
= ~0x7;
486 p
->depth_written
= 0;
489 *(p
->decl
++) = _3DSTATE_PIXEL_SHADER_PROGRAM
;
494 i915_fini_program(struct i915_fragment_program
*p
)
496 GLuint program_size
= p
->csr
- p
->program
;
497 GLuint decl_size
= p
->decl
- p
->declarations
;
499 if (p
->nr_tex_indirect
> I915_MAX_TEX_INDIRECT
) {
500 i915_program_error(p
, "Exceeded max nr indirect texture lookups "
502 p
->nr_tex_indirect
, I915_MAX_TEX_INDIRECT
);
505 if (p
->nr_tex_insn
> I915_MAX_TEX_INSN
) {
506 i915_program_error(p
, "Exceeded max TEX instructions (%d out of %d)",
507 p
->nr_tex_insn
, I915_MAX_TEX_INSN
);
510 if (p
->nr_alu_insn
> I915_MAX_ALU_INSN
)
511 i915_program_error(p
, "Exceeded max ALU instructions (%d out of %d)",
512 p
->nr_alu_insn
, I915_MAX_ALU_INSN
);
514 if (p
->nr_decl_insn
> I915_MAX_DECL_INSN
) {
515 i915_program_error(p
, "Exceeded max DECL instructions (%d out of %d)",
516 p
->nr_decl_insn
, I915_MAX_DECL_INSN
);
520 p
->FragProg
.Base
.NumNativeInstructions
= 0;
521 p
->FragProg
.Base
.NumNativeAluInstructions
= 0;
522 p
->FragProg
.Base
.NumNativeTexInstructions
= 0;
523 p
->FragProg
.Base
.NumNativeTexIndirections
= 0;
526 p
->FragProg
.Base
.NumNativeInstructions
= (p
->nr_alu_insn
+
529 p
->FragProg
.Base
.NumNativeAluInstructions
= p
->nr_alu_insn
;
530 p
->FragProg
.Base
.NumNativeTexInstructions
= p
->nr_tex_insn
;
531 p
->FragProg
.Base
.NumNativeTexIndirections
= p
->nr_tex_indirect
;
534 p
->declarations
[0] |= program_size
+ decl_size
- 2;
538 i915_upload_program(struct i915_context
*i915
,
539 struct i915_fragment_program
*p
)
541 GLuint program_size
= p
->csr
- p
->program
;
542 GLuint decl_size
= p
->decl
- p
->declarations
;
547 /* Could just go straight to the batchbuffer from here:
549 if (i915
->state
.ProgramSize
!= (program_size
+ decl_size
) ||
550 memcmp(i915
->state
.Program
+ decl_size
, p
->program
,
551 program_size
* sizeof(int)) != 0) {
552 I915_STATECHANGE(i915
, I915_UPLOAD_PROGRAM
);
553 memcpy(i915
->state
.Program
, p
->declarations
, decl_size
* sizeof(int));
554 memcpy(i915
->state
.Program
+ decl_size
, p
->program
,
555 program_size
* sizeof(int));
556 i915
->state
.ProgramSize
= decl_size
+ program_size
;
559 /* Always seemed to get a failure if I used memcmp() to
560 * shortcircuit this state upload. Needs further investigation?
562 if (p
->nr_constants
) {
563 GLuint nr
= p
->nr_constants
;
565 I915_ACTIVESTATE(i915
, I915_UPLOAD_CONSTANTS
, 1);
566 I915_STATECHANGE(i915
, I915_UPLOAD_CONSTANTS
);
568 i915
->state
.Constant
[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS
| ((nr
) * 4);
569 i915
->state
.Constant
[1] = (1 << (nr
- 1)) | ((1 << (nr
- 1)) - 1);
571 memcpy(&i915
->state
.Constant
[2], p
->constant
, 4 * sizeof(int) * (nr
));
572 i915
->state
.ConstantSize
= 2 + (nr
) * 4;
576 for (i
= 0; i
< nr
; i
++) {
577 fprintf(stderr
, "const[%d]: %f %f %f %f\n", i
,
579 p
->constant
[i
][1], p
->constant
[i
][2], p
->constant
[i
][3]);
584 I915_ACTIVESTATE(i915
, I915_UPLOAD_CONSTANTS
, 0);