1 /**************************************************************************
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
34 #include "tnl/t_context.h"
35 #include "intel_batchbuffer.h"
38 #include "i915_context.h"
39 #include "i915_program.h"
42 #define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
43 #define D0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
44 #define T0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT)
45 #define A0_SRC0( reg ) (((reg)&UREG_MASK)>>UREG_A0_SRC0_SHIFT_LEFT)
46 #define A1_SRC0( reg ) (((reg)&UREG_MASK)<<UREG_A1_SRC0_SHIFT_RIGHT)
47 #define A1_SRC1( reg ) (((reg)&UREG_MASK)>>UREG_A1_SRC1_SHIFT_LEFT)
48 #define A2_SRC1( reg ) (((reg)&UREG_MASK)<<UREG_A2_SRC1_SHIFT_RIGHT)
49 #define A2_SRC2( reg ) (((reg)&UREG_MASK)>>UREG_A2_SRC2_SHIFT_LEFT)
51 /* These are special, and don't have swizzle/negate bits.
53 #define T0_SAMPLER( reg ) (GET_UREG_NR(reg)<<T0_SAMPLER_NR_SHIFT)
54 #define T1_ADDRESS_REG( reg ) ((GET_UREG_NR(reg)<<T1_ADDRESS_REG_NR_SHIFT) | \
55 (GET_UREG_TYPE(reg)<<T1_ADDRESS_REG_TYPE_SHIFT))
58 /* Macros for translating UREG's into the various register fields used
59 * by the I915 programmable unit.
61 #define UREG_A0_DEST_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_DEST_TYPE_SHIFT)
62 #define UREG_A0_SRC0_SHIFT_LEFT (UREG_TYPE_SHIFT - A0_SRC0_TYPE_SHIFT)
63 #define UREG_A1_SRC0_SHIFT_RIGHT (A1_SRC0_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
64 #define UREG_A1_SRC1_SHIFT_LEFT (UREG_TYPE_SHIFT - A1_SRC1_TYPE_SHIFT)
65 #define UREG_A2_SRC1_SHIFT_RIGHT (A2_SRC1_CHANNEL_W_SHIFT - UREG_CHANNEL_W_SHIFT)
66 #define UREG_A2_SRC2_SHIFT_LEFT (UREG_TYPE_SHIFT - A2_SRC2_TYPE_SHIFT)
68 #define UREG_MASK 0xffffff00
69 #define UREG_TYPE_NR_MASK ((REG_TYPE_MASK << UREG_TYPE_SHIFT) | \
70 (REG_NR_MASK << UREG_NR_SHIFT))
73 #define I915_CONSTFLAG_PARAM 0x1f
75 GLuint
i915_get_temp( struct i915_fragment_program
*p
)
77 int bit
= ffs( ~p
->temp_flag
);
79 fprintf(stderr
, "%s: out of temporaries\n", __FILE__
);
83 p
->temp_flag
|= 1<<(bit
-1);
84 return UREG(REG_TYPE_R
, (bit
-1));
88 GLuint
i915_get_utemp( struct i915_fragment_program
*p
)
90 int bit
= ffs( ~p
->utemp_flag
);
92 fprintf(stderr
, "%s: out of temporaries\n", __FILE__
);
96 p
->utemp_flag
|= 1<<(bit
-1);
97 return UREG(REG_TYPE_U
, (bit
-1));
100 void i915_release_utemps( struct i915_fragment_program
*p
)
102 p
->utemp_flag
= ~0x7;
106 GLuint
i915_emit_decl( struct i915_fragment_program
*p
,
107 GLuint type
, GLuint nr
, GLuint d0_flags
)
109 GLuint reg
= UREG(type
, nr
);
111 if (type
== REG_TYPE_T
) {
112 if (p
->decl_t
& (1<<nr
))
115 p
->decl_t
|= (1<<nr
);
117 else if (type
== REG_TYPE_S
) {
118 if (p
->decl_s
& (1<<nr
))
121 p
->decl_s
|= (1<<nr
);
126 *(p
->decl
++) = (D0_DCL
| D0_DEST( reg
) | d0_flags
);
127 *(p
->decl
++) = D1_MBZ
;
128 *(p
->decl
++) = D2_MBZ
;
134 GLuint
i915_emit_arith( struct i915_fragment_program
*p
,
146 assert(GET_UREG_TYPE(dest
) != REG_TYPE_CONST
);
147 assert(dest
= UREG(GET_UREG_TYPE(dest
), GET_UREG_NR(dest
)));
149 if (GET_UREG_TYPE(src0
) == REG_TYPE_CONST
) c
[nr_const
++] = 0;
150 if (GET_UREG_TYPE(src1
) == REG_TYPE_CONST
) c
[nr_const
++] = 1;
151 if (GET_UREG_TYPE(src2
) == REG_TYPE_CONST
) c
[nr_const
++] = 2;
153 /* Recursively call this function to MOV additional const values
154 * into temporary registers. Use utemp registers for this -
155 * currently shouldn't be possible to run out, but keep an eye on
159 GLuint s
[3], first
, i
, old_utemp_flag
;
164 old_utemp_flag
= p
->utemp_flag
;
166 first
= GET_UREG_NR(s
[c
[0]]);
167 for (i
= 1 ; i
< nr_const
; i
++) {
168 if (GET_UREG_NR(s
[c
[i
]]) != first
) {
169 GLuint tmp
= i915_get_utemp(p
);
171 i915_emit_arith( p
, A0_MOV
, tmp
, A0_DEST_CHANNEL_ALL
, 0,
180 p
->utemp_flag
= old_utemp_flag
; /* restore */
188 *(p
->csr
++) = (A1_SRC0( src0
) |
190 *(p
->csr
++) = (A2_SRC1( src1
) |
197 GLuint
i915_emit_texld( struct i915_fragment_program
*p
,
204 assert(GET_UREG_TYPE(dest
) != REG_TYPE_CONST
);
205 assert(dest
= UREG(GET_UREG_TYPE(dest
), GET_UREG_NR(dest
)));
207 if (GET_UREG_TYPE(coord
) != REG_TYPE_T
) {
208 p
->nr_tex_indirect
++;
214 T0_SAMPLER( sampler
));
216 *(p
->csr
++) = T1_ADDRESS_REG( coord
);
217 *(p
->csr
++) = T2_MBZ
;
224 GLuint
i915_emit_const1f( struct i915_fragment_program
*p
, GLfloat c0
)
228 if (c0
== 0.0) return swizzle(UREG(REG_TYPE_R
, 0), ZERO
, ZERO
, ZERO
, ZERO
);
229 if (c0
== 1.0) return swizzle(UREG(REG_TYPE_R
, 0), ONE
, ONE
, ONE
, ONE
);
231 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
232 if (p
->constant_flags
[reg
] == I915_CONSTFLAG_PARAM
)
234 for (idx
= 0; idx
< 4; idx
++) {
235 if (!(p
->constant_flags
[reg
] & (1<<idx
)) ||
236 p
->constant
[reg
][idx
] == c0
) {
237 p
->constant
[reg
][idx
] = c0
;
238 p
->constant_flags
[reg
] |= 1<<idx
;
239 if (reg
+1 > p
->nr_constants
) p
->nr_constants
= reg
+1;
240 return swizzle(UREG(REG_TYPE_CONST
, reg
),idx
,ZERO
,ZERO
,ONE
);
245 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
250 GLuint
i915_emit_const2f( struct i915_fragment_program
*p
,
251 GLfloat c0
, GLfloat c1
)
255 if (c0
== 0.0) return swizzle(i915_emit_const1f(p
, c1
), ZERO
, X
, Z
, W
);
256 if (c0
== 1.0) return swizzle(i915_emit_const1f(p
, c1
), ONE
, X
, Z
, W
);
258 if (c1
== 0.0) return swizzle(i915_emit_const1f(p
, c0
), X
, ZERO
, Z
, W
);
259 if (c1
== 1.0) return swizzle(i915_emit_const1f(p
, c0
), X
, ONE
, Z
, W
);
261 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
262 if (p
->constant_flags
[reg
] == 0xf ||
263 p
->constant_flags
[reg
] == I915_CONSTFLAG_PARAM
)
265 for (idx
= 0; idx
< 3; idx
++) {
266 if (!(p
->constant_flags
[reg
] & (3<<idx
))) {
267 p
->constant
[reg
][idx
] = c0
;
268 p
->constant
[reg
][idx
+1] = c1
;
269 p
->constant_flags
[reg
] |= 3<<idx
;
270 if (reg
+1 > p
->nr_constants
) p
->nr_constants
= reg
+1;
271 return swizzle(UREG(REG_TYPE_CONST
, reg
),idx
,idx
+1,ZERO
,ONE
);
276 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
283 GLuint
i915_emit_const4f( struct i915_fragment_program
*p
,
284 GLfloat c0
, GLfloat c1
, GLfloat c2
, GLfloat c3
)
288 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
289 if (p
->constant_flags
[reg
] == 0xf &&
290 p
->constant
[reg
][0] == c0
&&
291 p
->constant
[reg
][1] == c1
&&
292 p
->constant
[reg
][2] == c2
&&
293 p
->constant
[reg
][3] == c3
) {
294 return UREG(REG_TYPE_CONST
, reg
);
296 else if (p
->constant_flags
[reg
] == 0) {
297 p
->constant
[reg
][0] = c0
;
298 p
->constant
[reg
][1] = c1
;
299 p
->constant
[reg
][2] = c2
;
300 p
->constant
[reg
][3] = c3
;
301 p
->constant_flags
[reg
] = 0xf;
302 if (reg
+1 > p
->nr_constants
) p
->nr_constants
= reg
+1;
303 return UREG(REG_TYPE_CONST
, reg
);
307 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
313 GLuint
i915_emit_const4fv( struct i915_fragment_program
*p
, const GLfloat
*c
)
315 return i915_emit_const4f( p
, c
[0], c
[1], c
[2], c
[3] );
319 GLuint
i915_emit_param4fv( struct i915_fragment_program
*p
,
320 const GLfloat
*values
)
324 for (i
= 0; i
< p
->nr_params
; i
++) {
325 if (p
->param
[i
].values
== values
)
326 return UREG(REG_TYPE_CONST
, p
->param
[i
].reg
);
330 for (reg
= 0; reg
< I915_MAX_CONSTANT
; reg
++) {
331 if (p
->constant_flags
[reg
] == 0) {
332 p
->constant_flags
[reg
] = I915_CONSTFLAG_PARAM
;
335 p
->param
[i
].values
= values
;
336 p
->param
[i
].reg
= reg
;
337 p
->params_uptodate
= 0;
339 if (reg
+1 > p
->nr_constants
) p
->nr_constants
= reg
+1;
340 return UREG(REG_TYPE_CONST
, reg
);
344 fprintf(stderr
, "%s: out of constants\n", __FUNCTION__
);
352 void i915_program_error( struct i915_fragment_program
*p
, const GLubyte
*msg
)
354 fprintf(stderr
, "%s\n", msg
);
358 void i915_init_program( i915ContextPtr i915
, struct i915_fragment_program
*p
)
360 GLcontext
*ctx
= &i915
->intel
.ctx
;
361 TNLcontext
*tnl
= TNL_CONTEXT( ctx
);
364 p
->params_uptodate
= 0;
368 p
->nr_tex_indirect
= 1; /* correct? */
374 memset( p
->constant_flags
, 0, sizeof(p
->constant_flags
) );
378 p
->decl
= p
->declarations
;
381 p
->temp_flag
= 0xffff000;
382 p
->utemp_flag
= ~0x7;
384 p
->depth_written
= 0;
387 p
->src_texture
= UREG_BAD
;
388 p
->src_previous
= UREG(REG_TYPE_T
, T_DIFFUSE
);
389 p
->last_tex_stage
= 0;
392 *(p
->decl
++) = _3DSTATE_PIXEL_SHADER_PROGRAM
;
396 void i915_fini_program( struct i915_fragment_program
*p
)
398 GLuint program_size
= p
->csr
- p
->program
;
399 GLuint decl_size
= p
->decl
- p
->declarations
;
401 if (p
->nr_tex_indirect
> I915_MAX_TEX_INDIRECT
)
402 i915_program_error(p
, "Exceeded max nr indirect texture lookups");
404 if (p
->nr_tex_insn
> I915_MAX_TEX_INSN
)
405 i915_program_error(p
, "Exceeded max TEX instructions");
407 if (p
->nr_alu_insn
> I915_MAX_ALU_INSN
)
408 i915_program_error(p
, "Exceeded max ALU instructions");
410 if (p
->nr_decl_insn
> I915_MAX_DECL_INSN
)
411 i915_program_error(p
, "Exceeded max DECL instructions");
413 p
->declarations
[0] |= program_size
+ decl_size
- 2;
416 void i915_upload_program( i915ContextPtr i915
, struct i915_fragment_program
*p
)
418 GLuint program_size
= p
->csr
- p
->program
;
419 GLuint decl_size
= p
->decl
- p
->declarations
;
421 FALLBACK( &i915
->intel
, I915_FALLBACK_PROGRAM
, p
->error
);
423 /* Could just go straight to the batchbuffer from here:
425 if (i915
->state
.ProgramSize
!= (program_size
+ decl_size
) ||
426 memcmp(i915
->state
.Program
+ decl_size
, p
->program
,
427 program_size
*sizeof(int)) != 0) {
428 I915_STATECHANGE( i915
, I915_UPLOAD_PROGRAM
);
429 memcpy(i915
->state
.Program
, p
->declarations
, decl_size
*sizeof(int));
430 memcpy(i915
->state
.Program
+ decl_size
, p
->program
,
431 program_size
*sizeof(int));
432 i915
->state
.ProgramSize
= decl_size
+ program_size
;
435 /* Always seemed to get a failure if I used memcmp() to
436 * shortcircuit this state upload. Needs further investigation?
438 if (p
->nr_constants
) {
439 GLuint nr
= p
->nr_constants
;
441 I915_ACTIVESTATE( i915
, I915_UPLOAD_CONSTANTS
, 1 );
442 I915_STATECHANGE( i915
, I915_UPLOAD_CONSTANTS
);
444 i915
->state
.Constant
[0] = _3DSTATE_PIXEL_SHADER_CONSTANTS
| ((nr
) * 4);
445 i915
->state
.Constant
[1] = (1<<(nr
-1)) | ((1<<(nr
-1))-1);
447 memcpy(&i915
->state
.Constant
[2], p
->constant
, 4*sizeof(int)*(nr
));
448 i915
->state
.ConstantSize
= 2 + (nr
) * 4;
452 for (i
= 0; i
< nr
; i
++) {
453 fprintf(stderr
, "const[%d]: %f %f %f %f\n", i
,
462 I915_ACTIVESTATE( i915
, I915_UPLOAD_CONSTANTS
, 0 );