2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
36 #include "brw_structs.h"
37 #include "brw_defines.h"
38 #include "shader/program.h"
40 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
41 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
43 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
44 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
45 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
46 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
49 #define REG_SIZE (8*4)
52 /* These aren't hardware structs, just something useful for us to pass around:
54 * Align1 operation has a lot of control over input ranges. Used in
55 * WM programs to implement shaders decomposed into "channel serial"
56 * or "structure of array" form:
63 GLuint subnr
:5; /* :1 in align16 */
64 GLuint negate
:1; /* source only */
65 GLuint abs
:1; /* source only */
66 GLuint vstride
:4; /* source only */
67 GLuint width
:3; /* src only, align1 only */
68 GLuint hstride
:2; /* src only, align1 only */
69 GLuint address_mode
:1; /* relative addressing, hopefully! */
74 GLuint swizzle
:8; /* src only, align16 only */
75 GLuint writemask
:4; /* dest only, align16 only */
76 GLint indirect_offset
:10; /* relative addressing offset */
77 GLuint pad1
:10; /* two dwords total */
94 #define BRW_EU_MAX_INSN_STACK 5
95 #define BRW_EU_MAX_INSN 1200
98 struct brw_instruction store
[BRW_EU_MAX_INSN
];
101 /* Allow clients to push/pop instruction state:
103 struct brw_instruction stack
[BRW_EU_MAX_INSN_STACK
];
104 struct brw_instruction
*current
;
107 GLboolean single_program_flow
;
112 static __inline
int type_sz( GLuint type
)
115 case BRW_REGISTER_TYPE_UD
:
116 case BRW_REGISTER_TYPE_D
:
117 case BRW_REGISTER_TYPE_F
:
119 case BRW_REGISTER_TYPE_HF
:
120 case BRW_REGISTER_TYPE_UW
:
121 case BRW_REGISTER_TYPE_W
:
123 case BRW_REGISTER_TYPE_UB
:
124 case BRW_REGISTER_TYPE_B
:
131 static __inline
struct brw_reg
brw_reg( GLuint file
,
146 reg
.subnr
= subnr
* type_sz(type
);
149 reg
.vstride
= vstride
;
151 reg
.hstride
= hstride
;
152 reg
.address_mode
= BRW_ADDRESS_DIRECT
;
155 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
156 * set swizzle and writemask to W, as the lower bits of subnr will
157 * be lost when converted to align16. This is probably too much to
158 * keep track of as you'd want it adjusted by suboffset(), etc.
159 * Perhaps fix up when converting to align16?
161 reg
.dw1
.bits
.swizzle
= swizzle
;
162 reg
.dw1
.bits
.writemask
= writemask
;
163 reg
.dw1
.bits
.indirect_offset
= 0;
164 reg
.dw1
.bits
.pad1
= 0;
168 static __inline
struct brw_reg
brw_vec16_reg( GLuint file
,
176 BRW_VERTICAL_STRIDE_16
,
178 BRW_HORIZONTAL_STRIDE_1
,
183 static __inline
struct brw_reg
brw_vec8_reg( GLuint file
,
191 BRW_VERTICAL_STRIDE_8
,
193 BRW_HORIZONTAL_STRIDE_1
,
199 static __inline
struct brw_reg
brw_vec4_reg( GLuint file
,
207 BRW_VERTICAL_STRIDE_4
,
209 BRW_HORIZONTAL_STRIDE_1
,
215 static __inline
struct brw_reg
brw_vec2_reg( GLuint file
,
223 BRW_VERTICAL_STRIDE_2
,
225 BRW_HORIZONTAL_STRIDE_1
,
230 static __inline
struct brw_reg
brw_vec1_reg( GLuint file
,
238 BRW_VERTICAL_STRIDE_0
,
240 BRW_HORIZONTAL_STRIDE_0
,
246 static __inline
struct brw_reg
retype( struct brw_reg reg
,
253 static __inline
struct brw_reg
suboffset( struct brw_reg reg
,
256 reg
.subnr
+= delta
* type_sz(reg
.type
);
261 static __inline
struct brw_reg
offset( struct brw_reg reg
,
269 static __inline
struct brw_reg
byte_offset( struct brw_reg reg
,
272 GLuint newoffset
= reg
.nr
* REG_SIZE
+ reg
.subnr
+ bytes
;
273 reg
.nr
= newoffset
/ REG_SIZE
;
274 reg
.subnr
= newoffset
% REG_SIZE
;
279 static __inline
struct brw_reg
brw_uw16_reg( GLuint file
,
283 return suboffset(retype(brw_vec16_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
286 static __inline
struct brw_reg
brw_uw8_reg( GLuint file
,
290 return suboffset(retype(brw_vec8_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
293 static __inline
struct brw_reg
brw_uw1_reg( GLuint file
,
297 return suboffset(retype(brw_vec1_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
300 static __inline
struct brw_reg
brw_imm_reg( GLuint type
)
302 return brw_reg( BRW_IMMEDIATE_VALUE
,
306 BRW_VERTICAL_STRIDE_0
,
308 BRW_HORIZONTAL_STRIDE_0
,
313 static __inline
struct brw_reg
brw_imm_f( GLfloat f
)
315 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_F
);
320 static __inline
struct brw_reg
brw_imm_d( GLint d
)
322 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_D
);
327 static __inline
struct brw_reg
brw_imm_ud( GLuint ud
)
329 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_UD
);
334 static __inline
struct brw_reg
brw_imm_uw( GLushort uw
)
336 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_UW
);
341 static __inline
struct brw_reg
brw_imm_w( GLshort w
)
343 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_W
);
348 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
349 * numbers alias with _V and _VF below:
352 /* Vector of eight signed half-byte values:
354 static __inline
struct brw_reg
brw_imm_v( GLuint v
)
356 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_V
);
357 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
358 imm
.width
= BRW_WIDTH_8
;
359 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
364 /* Vector of four 8-bit float values:
366 static __inline
struct brw_reg
brw_imm_vf( GLuint v
)
368 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_VF
);
369 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
370 imm
.width
= BRW_WIDTH_4
;
371 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
378 #define VF_NEG (1<<7)
380 static __inline
struct brw_reg
brw_imm_vf4( GLuint v0
,
385 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_VF
);
386 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
387 imm
.width
= BRW_WIDTH_4
;
388 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
389 imm
.dw1
.ud
= ((v0
<< 0) |
397 static __inline
struct brw_reg
brw_address( struct brw_reg reg
)
399 return brw_imm_uw(reg
.nr
* REG_SIZE
+ reg
.subnr
);
403 static __inline
struct brw_reg
brw_vec1_grf( GLuint nr
,
406 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
409 static __inline
struct brw_reg
brw_vec8_grf( GLuint nr
,
412 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
415 static __inline
struct brw_reg
brw_vec4_grf( GLuint nr
,
418 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
422 static __inline
struct brw_reg
brw_vec2_grf( GLuint nr
,
425 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
428 static __inline
struct brw_reg
brw_uw8_grf( GLuint nr
,
431 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
434 static __inline
struct brw_reg
brw_null_reg( void )
436 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
441 static __inline
struct brw_reg
brw_address_reg( GLuint subnr
)
443 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
448 /* If/else instructions break in align16 mode if writemask & swizzle
449 * aren't xyzw. This goes against the convention for other scalar
452 static __inline
struct brw_reg
brw_ip_reg( void )
454 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
457 BRW_REGISTER_TYPE_UD
,
458 BRW_VERTICAL_STRIDE_4
, /* ? */
460 BRW_HORIZONTAL_STRIDE_0
,
461 BRW_SWIZZLE_XYZW
, /* NOTE! */
462 WRITEMASK_XYZW
); /* NOTE! */
465 static __inline
struct brw_reg
brw_acc_reg( void )
467 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
473 static __inline
struct brw_reg
brw_flag_reg( void )
475 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
481 static __inline
struct brw_reg
brw_mask_reg( GLuint subnr
)
483 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
488 static __inline
struct brw_reg
brw_message_reg( GLuint nr
)
490 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
,
498 /* This is almost always called with a numeric constant argument, so
499 * make things easy to evaluate at compile time:
501 static __inline GLuint
cvt( GLuint val
)
515 static __inline
struct brw_reg
stride( struct brw_reg reg
,
521 reg
.vstride
= cvt(vstride
);
522 reg
.width
= cvt(width
) - 1;
523 reg
.hstride
= cvt(hstride
);
527 static __inline
struct brw_reg
vec16( struct brw_reg reg
)
529 return stride(reg
, 16,16,1);
532 static __inline
struct brw_reg
vec8( struct brw_reg reg
)
534 return stride(reg
, 8,8,1);
537 static __inline
struct brw_reg
vec4( struct brw_reg reg
)
539 return stride(reg
, 4,4,1);
542 static __inline
struct brw_reg
vec2( struct brw_reg reg
)
544 return stride(reg
, 2,2,1);
547 static __inline
struct brw_reg
vec1( struct brw_reg reg
)
549 return stride(reg
, 0,1,0);
552 static __inline
struct brw_reg
get_element( struct brw_reg reg
, GLuint elt
)
554 return vec1(suboffset(reg
, elt
));
557 static __inline
struct brw_reg
get_element_ud( struct brw_reg reg
, GLuint elt
)
559 return vec1(suboffset(retype(reg
, BRW_REGISTER_TYPE_UD
), elt
));
563 static __inline
struct brw_reg
brw_swizzle( struct brw_reg reg
,
569 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, x
),
570 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, y
),
571 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, z
),
572 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, w
));
577 static __inline
struct brw_reg
brw_swizzle1( struct brw_reg reg
,
580 return brw_swizzle(reg
, x
, x
, x
, x
);
583 static __inline
struct brw_reg
brw_writemask( struct brw_reg reg
,
586 reg
.dw1
.bits
.writemask
&= mask
;
590 static __inline
struct brw_reg
brw_set_writemask( struct brw_reg reg
,
593 reg
.dw1
.bits
.writemask
= mask
;
597 static __inline
struct brw_reg
negate( struct brw_reg reg
)
603 static __inline
struct brw_reg
brw_abs( struct brw_reg reg
)
609 /***********************************************************************
611 static __inline
struct brw_reg
brw_vec4_indirect( GLuint subnr
,
614 struct brw_reg reg
= brw_vec4_grf(0, 0);
616 reg
.address_mode
= BRW_ADDRESS_REGISTER_INDIRECT_REGISTER
;
617 reg
.dw1
.bits
.indirect_offset
= offset
;
621 static __inline
struct brw_reg
brw_vec1_indirect( GLuint subnr
,
624 struct brw_reg reg
= brw_vec1_grf(0, 0);
626 reg
.address_mode
= BRW_ADDRESS_REGISTER_INDIRECT_REGISTER
;
627 reg
.dw1
.bits
.indirect_offset
= offset
;
631 static __inline
struct brw_reg
deref_4f(struct brw_indirect ptr
, GLint offset
)
633 return brw_vec4_indirect(ptr
.addr_subnr
, ptr
.addr_offset
+ offset
);
636 static __inline
struct brw_reg
deref_1f(struct brw_indirect ptr
, GLint offset
)
638 return brw_vec1_indirect(ptr
.addr_subnr
, ptr
.addr_offset
+ offset
);
641 static __inline
struct brw_reg
deref_4b(struct brw_indirect ptr
, GLint offset
)
643 return retype(deref_4f(ptr
, offset
), BRW_REGISTER_TYPE_B
);
646 static __inline
struct brw_reg
deref_1uw(struct brw_indirect ptr
, GLint offset
)
648 return retype(deref_1f(ptr
, offset
), BRW_REGISTER_TYPE_UW
);
651 static __inline
struct brw_reg
get_addr_reg(struct brw_indirect ptr
)
653 return brw_address_reg(ptr
.addr_subnr
);
656 static __inline
struct brw_indirect
brw_indirect_offset( struct brw_indirect ptr
, GLint offset
)
658 ptr
.addr_offset
+= offset
;
662 static __inline
struct brw_indirect
brw_indirect( GLuint addr_subnr
, GLint offset
)
664 struct brw_indirect ptr
;
665 ptr
.addr_subnr
= addr_subnr
;
666 ptr
.addr_offset
= offset
;
673 void brw_pop_insn_state( struct brw_compile
*p
);
674 void brw_push_insn_state( struct brw_compile
*p
);
675 void brw_set_mask_control( struct brw_compile
*p
, GLuint value
);
676 void brw_set_saturate( struct brw_compile
*p
, GLuint value
);
677 void brw_set_access_mode( struct brw_compile
*p
, GLuint access_mode
);
678 void brw_set_compression_control( struct brw_compile
*p
, GLboolean control
);
679 void brw_set_predicate_control_flag_value( struct brw_compile
*p
, GLuint value
);
680 void brw_set_predicate_control( struct brw_compile
*p
, GLuint pc
);
681 void brw_set_conditionalmod( struct brw_compile
*p
, GLuint conditional
);
683 void brw_init_compile( struct brw_compile
*p
);
684 const GLuint
*brw_get_program( struct brw_compile
*p
, GLuint
*sz
);
687 /* Helpers for regular instructions:
690 struct brw_instruction *brw_##OP(struct brw_compile *p, \
691 struct brw_reg dest, \
692 struct brw_reg src0);
695 struct brw_instruction *brw_##OP(struct brw_compile *p, \
696 struct brw_reg dest, \
697 struct brw_reg src0, \
698 struct brw_reg src1);
730 /* Helpers for SEND instruction:
732 void brw_urb_WRITE(struct brw_compile
*p
,
739 GLuint response_length
,
741 GLboolean writes_complete
,
745 void brw_fb_WRITE(struct brw_compile
*p
,
749 GLuint binding_table_index
,
751 GLuint response_length
,
754 void brw_SAMPLE(struct brw_compile
*p
,
758 GLuint binding_table_index
,
762 GLuint response_length
,
766 void brw_math_16( struct brw_compile
*p
,
774 void brw_math( struct brw_compile
*p
,
783 void brw_dp_READ_16( struct brw_compile
*p
,
786 GLuint scratch_offset
);
788 void brw_dp_WRITE_16( struct brw_compile
*p
,
791 GLuint scratch_offset
);
793 /* If/else/endif. Works by manipulating the execution flags on each
796 struct brw_instruction
*brw_IF(struct brw_compile
*p
,
797 GLuint execute_size
);
799 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
800 struct brw_instruction
*if_insn
);
802 void brw_ENDIF(struct brw_compile
*p
,
803 struct brw_instruction
*if_or_else_insn
);
808 struct brw_instruction
*brw_DO(struct brw_compile
*p
,
809 GLuint execute_size
);
811 void brw_WHILE(struct brw_compile
*p
,
812 struct brw_instruction
*patch_insn
);
816 void brw_land_fwd_jump(struct brw_compile
*p
,
817 struct brw_instruction
*jmp_insn
);
821 void brw_NOP(struct brw_compile
*p
);
823 /* Special case: there is never a destination, execution size will be
826 void brw_CMP(struct brw_compile
*p
,
830 struct brw_reg src1
);
832 void brw_print_reg( struct brw_reg reg
);
835 /***********************************************************************
839 void brw_copy_indirect_to_indirect(struct brw_compile
*p
,
840 struct brw_indirect dst_ptr
,
841 struct brw_indirect src_ptr
,
844 void brw_copy_from_indirect(struct brw_compile
*p
,
846 struct brw_indirect ptr
,
849 void brw_copy4(struct brw_compile
*p
,
854 void brw_copy8(struct brw_compile
*p
,
859 void brw_math_invert( struct brw_compile
*p
,