2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
4 develop this 3D driver.
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **********************************************************************/
29 * Keith Whitwell <keith@tungstengraphics.com>
36 #include "brw_structs.h"
37 #include "brw_defines.h"
38 #include "shader/prog_instruction.h"
40 #define BRW_SWIZZLE4(a,b,c,d) (((a)<<0) | ((b)<<2) | ((c)<<4) | ((d)<<6))
41 #define BRW_GET_SWZ(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
43 #define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
44 #define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
45 #define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
46 #define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
49 #define REG_SIZE (8*4)
52 /* These aren't hardware structs, just something useful for us to pass around:
54 * Align1 operation has a lot of control over input ranges. Used in
55 * WM programs to implement shaders decomposed into "channel serial"
56 * or "structure of array" form:
63 GLuint subnr
:5; /* :1 in align16 */
64 GLuint negate
:1; /* source only */
65 GLuint abs
:1; /* source only */
66 GLuint vstride
:4; /* source only */
67 GLuint width
:3; /* src only, align1 only */
68 GLuint hstride
:2; /* align1 only */
69 GLuint address_mode
:1; /* relative addressing, hopefully! */
74 GLuint swizzle
:8; /* src only, align16 only */
75 GLuint writemask
:4; /* dest only, align16 only */
76 GLint indirect_offset
:10; /* relative addressing offset */
77 GLuint pad1
:10; /* two dwords total */
94 struct brw_glsl_label
;
99 #define BRW_EU_MAX_INSN_STACK 5
100 #define BRW_EU_MAX_INSN 1200
103 struct brw_instruction store
[BRW_EU_MAX_INSN
];
106 /* Allow clients to push/pop instruction state:
108 struct brw_instruction stack
[BRW_EU_MAX_INSN_STACK
];
109 struct brw_instruction
*current
;
112 GLboolean single_program_flow
;
113 struct brw_context
*brw
;
115 struct brw_glsl_label
*first_label
; /**< linked list of labels */
116 struct brw_glsl_call
*first_call
; /**< linked list of CALs */
121 brw_save_label(struct brw_compile
*c
, const char *name
, GLuint position
);
124 brw_save_call(struct brw_compile
*c
, const char *name
, GLuint call_pos
);
127 brw_resolve_cals(struct brw_compile
*c
);
131 static INLINE
int type_sz( GLuint type
)
134 case BRW_REGISTER_TYPE_UD
:
135 case BRW_REGISTER_TYPE_D
:
136 case BRW_REGISTER_TYPE_F
:
138 case BRW_REGISTER_TYPE_HF
:
139 case BRW_REGISTER_TYPE_UW
:
140 case BRW_REGISTER_TYPE_W
:
142 case BRW_REGISTER_TYPE_UB
:
143 case BRW_REGISTER_TYPE_B
:
151 * Construct a brw_reg.
152 * \param file one of the BRW_x_REGISTER_FILE values
153 * \param nr register number/index
154 * \param subnr register sub number
155 * \param type one of BRW_REGISTER_TYPE_x
156 * \param vstride one of BRW_VERTICAL_STRIDE_x
157 * \param width one of BRW_WIDTH_x
158 * \param hstride one of BRW_HORIZONTAL_STRIDE_x
159 * \param swizzle one of BRW_SWIZZLE_x
160 * \param writemask WRITEMASK_X/Y/Z/W bitfield
162 static INLINE
struct brw_reg
brw_reg( GLuint file
,
176 reg
.subnr
= subnr
* type_sz(type
);
179 reg
.vstride
= vstride
;
181 reg
.hstride
= hstride
;
182 reg
.address_mode
= BRW_ADDRESS_DIRECT
;
185 /* Could do better: If the reg is r5.3<0;1,0>, we probably want to
186 * set swizzle and writemask to W, as the lower bits of subnr will
187 * be lost when converted to align16. This is probably too much to
188 * keep track of as you'd want it adjusted by suboffset(), etc.
189 * Perhaps fix up when converting to align16?
191 reg
.dw1
.bits
.swizzle
= swizzle
;
192 reg
.dw1
.bits
.writemask
= writemask
;
193 reg
.dw1
.bits
.indirect_offset
= 0;
194 reg
.dw1
.bits
.pad1
= 0;
198 /** Construct float[16] register */
199 static INLINE
struct brw_reg
brw_vec16_reg( GLuint file
,
207 BRW_VERTICAL_STRIDE_16
,
209 BRW_HORIZONTAL_STRIDE_1
,
214 /** Construct float[8] register */
215 static INLINE
struct brw_reg
brw_vec8_reg( GLuint file
,
223 BRW_VERTICAL_STRIDE_8
,
225 BRW_HORIZONTAL_STRIDE_1
,
230 /** Construct float[4] register */
231 static INLINE
struct brw_reg
brw_vec4_reg( GLuint file
,
239 BRW_VERTICAL_STRIDE_4
,
241 BRW_HORIZONTAL_STRIDE_1
,
246 /** Construct float[2] register */
247 static INLINE
struct brw_reg
brw_vec2_reg( GLuint file
,
255 BRW_VERTICAL_STRIDE_2
,
257 BRW_HORIZONTAL_STRIDE_1
,
262 /** Construct float[1] register */
263 static INLINE
struct brw_reg
brw_vec1_reg( GLuint file
,
271 BRW_VERTICAL_STRIDE_0
,
273 BRW_HORIZONTAL_STRIDE_0
,
279 static INLINE
struct brw_reg
retype( struct brw_reg reg
,
286 static INLINE
struct brw_reg
suboffset( struct brw_reg reg
,
289 reg
.subnr
+= delta
* type_sz(reg
.type
);
294 static INLINE
struct brw_reg
offset( struct brw_reg reg
,
302 static INLINE
struct brw_reg
byte_offset( struct brw_reg reg
,
305 GLuint newoffset
= reg
.nr
* REG_SIZE
+ reg
.subnr
+ bytes
;
306 reg
.nr
= newoffset
/ REG_SIZE
;
307 reg
.subnr
= newoffset
% REG_SIZE
;
312 /** Construct unsigned word[16] register */
313 static INLINE
struct brw_reg
brw_uw16_reg( GLuint file
,
317 return suboffset(retype(brw_vec16_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
320 /** Construct unsigned word[8] register */
321 static INLINE
struct brw_reg
brw_uw8_reg( GLuint file
,
325 return suboffset(retype(brw_vec8_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
328 /** Construct unsigned word[1] register */
329 static INLINE
struct brw_reg
brw_uw1_reg( GLuint file
,
333 return suboffset(retype(brw_vec1_reg(file
, nr
, 0), BRW_REGISTER_TYPE_UW
), subnr
);
336 static INLINE
struct brw_reg
brw_imm_reg( GLuint type
)
338 return brw_reg( BRW_IMMEDIATE_VALUE
,
342 BRW_VERTICAL_STRIDE_0
,
344 BRW_HORIZONTAL_STRIDE_0
,
349 /** Construct float immediate register */
350 static INLINE
struct brw_reg
brw_imm_f( GLfloat f
)
352 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_F
);
357 /** Construct integer immediate register */
358 static INLINE
struct brw_reg
brw_imm_d( GLint d
)
360 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_D
);
365 /** Construct uint immediate register */
366 static INLINE
struct brw_reg
brw_imm_ud( GLuint ud
)
368 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_UD
);
373 /** Construct ushort immediate register */
374 static INLINE
struct brw_reg
brw_imm_uw( GLushort uw
)
376 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_UW
);
377 imm
.dw1
.ud
= uw
| (uw
<< 16);
381 /** Construct short immediate register */
382 static INLINE
struct brw_reg
brw_imm_w( GLshort w
)
384 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_W
);
385 imm
.dw1
.d
= w
| (w
<< 16);
389 /* brw_imm_b and brw_imm_ub aren't supported by hardware - the type
390 * numbers alias with _V and _VF below:
393 /** Construct vector of eight signed half-byte values */
394 static INLINE
struct brw_reg
brw_imm_v( GLuint v
)
396 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_V
);
397 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
398 imm
.width
= BRW_WIDTH_8
;
399 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
404 /** Construct vector of four 8-bit float values */
405 static INLINE
struct brw_reg
brw_imm_vf( GLuint v
)
407 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_VF
);
408 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
409 imm
.width
= BRW_WIDTH_4
;
410 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
417 #define VF_NEG (1<<7)
419 static INLINE
struct brw_reg
brw_imm_vf4( GLuint v0
,
424 struct brw_reg imm
= brw_imm_reg(BRW_REGISTER_TYPE_VF
);
425 imm
.vstride
= BRW_VERTICAL_STRIDE_0
;
426 imm
.width
= BRW_WIDTH_4
;
427 imm
.hstride
= BRW_HORIZONTAL_STRIDE_1
;
428 imm
.dw1
.ud
= ((v0
<< 0) |
436 static INLINE
struct brw_reg
brw_address( struct brw_reg reg
)
438 return brw_imm_uw(reg
.nr
* REG_SIZE
+ reg
.subnr
);
441 /** Construct float[1] general-purpose register */
442 static INLINE
struct brw_reg
brw_vec1_grf( GLuint nr
, GLuint subnr
)
444 return brw_vec1_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
447 /** Construct float[2] general-purpose register */
448 static INLINE
struct brw_reg
brw_vec2_grf( GLuint nr
, GLuint subnr
)
450 return brw_vec2_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
453 /** Construct float[4] general-purpose register */
454 static INLINE
struct brw_reg
brw_vec4_grf( GLuint nr
, GLuint subnr
)
456 return brw_vec4_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
459 /** Construct float[8] general-purpose register */
460 static INLINE
struct brw_reg
brw_vec8_grf( GLuint nr
, GLuint subnr
)
462 return brw_vec8_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
466 static INLINE
struct brw_reg
brw_uw8_grf( GLuint nr
, GLuint subnr
)
468 return brw_uw8_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
471 static INLINE
struct brw_reg
brw_uw16_grf( GLuint nr
, GLuint subnr
)
473 return brw_uw16_reg(BRW_GENERAL_REGISTER_FILE
, nr
, subnr
);
477 /** Construct null register (usually used for setting condition codes) */
478 static INLINE
struct brw_reg
brw_null_reg( void )
480 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
485 static INLINE
struct brw_reg
brw_address_reg( GLuint subnr
)
487 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
492 /* If/else instructions break in align16 mode if writemask & swizzle
493 * aren't xyzw. This goes against the convention for other scalar
496 static INLINE
struct brw_reg
brw_ip_reg( void )
498 return brw_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
501 BRW_REGISTER_TYPE_UD
,
502 BRW_VERTICAL_STRIDE_4
, /* ? */
504 BRW_HORIZONTAL_STRIDE_0
,
505 BRW_SWIZZLE_XYZW
, /* NOTE! */
506 WRITEMASK_XYZW
); /* NOTE! */
509 static INLINE
struct brw_reg
brw_acc_reg( void )
511 return brw_vec8_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
517 static INLINE
struct brw_reg
brw_flag_reg( void )
519 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
525 static INLINE
struct brw_reg
brw_mask_reg( GLuint subnr
)
527 return brw_uw1_reg(BRW_ARCHITECTURE_REGISTER_FILE
,
532 static INLINE
struct brw_reg
brw_message_reg( GLuint nr
)
534 return brw_vec8_reg(BRW_MESSAGE_REGISTER_FILE
,
542 /* This is almost always called with a numeric constant argument, so
543 * make things easy to evaluate at compile time:
545 static INLINE GLuint
cvt( GLuint val
)
559 static INLINE
struct brw_reg
stride( struct brw_reg reg
,
564 reg
.vstride
= cvt(vstride
);
565 reg
.width
= cvt(width
) - 1;
566 reg
.hstride
= cvt(hstride
);
571 static INLINE
struct brw_reg
vec16( struct brw_reg reg
)
573 return stride(reg
, 16,16,1);
576 static INLINE
struct brw_reg
vec8( struct brw_reg reg
)
578 return stride(reg
, 8,8,1);
581 static INLINE
struct brw_reg
vec4( struct brw_reg reg
)
583 return stride(reg
, 4,4,1);
586 static INLINE
struct brw_reg
vec2( struct brw_reg reg
)
588 return stride(reg
, 2,2,1);
591 static INLINE
struct brw_reg
vec1( struct brw_reg reg
)
593 return stride(reg
, 0,1,0);
597 static INLINE
struct brw_reg
get_element( struct brw_reg reg
, GLuint elt
)
599 return vec1(suboffset(reg
, elt
));
602 static INLINE
struct brw_reg
get_element_ud( struct brw_reg reg
, GLuint elt
)
604 return vec1(suboffset(retype(reg
, BRW_REGISTER_TYPE_UD
), elt
));
608 static INLINE
struct brw_reg
brw_swizzle( struct brw_reg reg
,
614 reg
.dw1
.bits
.swizzle
= BRW_SWIZZLE4(BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, x
),
615 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, y
),
616 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, z
),
617 BRW_GET_SWZ(reg
.dw1
.bits
.swizzle
, w
));
622 static INLINE
struct brw_reg
brw_swizzle1( struct brw_reg reg
,
625 return brw_swizzle(reg
, x
, x
, x
, x
);
628 static INLINE
struct brw_reg
brw_writemask( struct brw_reg reg
,
631 reg
.dw1
.bits
.writemask
&= mask
;
635 static INLINE
struct brw_reg
brw_set_writemask( struct brw_reg reg
,
638 reg
.dw1
.bits
.writemask
= mask
;
642 static INLINE
struct brw_reg
negate( struct brw_reg reg
)
648 static INLINE
struct brw_reg
brw_abs( struct brw_reg reg
)
654 /***********************************************************************
656 static INLINE
struct brw_reg
brw_vec4_indirect( GLuint subnr
,
659 struct brw_reg reg
= brw_vec4_grf(0, 0);
661 reg
.address_mode
= BRW_ADDRESS_REGISTER_INDIRECT_REGISTER
;
662 reg
.dw1
.bits
.indirect_offset
= offset
;
666 static INLINE
struct brw_reg
brw_vec1_indirect( GLuint subnr
,
669 struct brw_reg reg
= brw_vec1_grf(0, 0);
671 reg
.address_mode
= BRW_ADDRESS_REGISTER_INDIRECT_REGISTER
;
672 reg
.dw1
.bits
.indirect_offset
= offset
;
676 static INLINE
struct brw_reg
deref_4f(struct brw_indirect ptr
, GLint offset
)
678 return brw_vec4_indirect(ptr
.addr_subnr
, ptr
.addr_offset
+ offset
);
681 static INLINE
struct brw_reg
deref_1f(struct brw_indirect ptr
, GLint offset
)
683 return brw_vec1_indirect(ptr
.addr_subnr
, ptr
.addr_offset
+ offset
);
686 static INLINE
struct brw_reg
deref_4b(struct brw_indirect ptr
, GLint offset
)
688 return retype(deref_4f(ptr
, offset
), BRW_REGISTER_TYPE_B
);
691 static INLINE
struct brw_reg
deref_1uw(struct brw_indirect ptr
, GLint offset
)
693 return retype(deref_1f(ptr
, offset
), BRW_REGISTER_TYPE_UW
);
696 static INLINE
struct brw_reg
deref_1d(struct brw_indirect ptr
, GLint offset
)
698 return retype(deref_1f(ptr
, offset
), BRW_REGISTER_TYPE_D
);
701 static INLINE
struct brw_reg
deref_1ud(struct brw_indirect ptr
, GLint offset
)
703 return retype(deref_1f(ptr
, offset
), BRW_REGISTER_TYPE_UD
);
706 static INLINE
struct brw_reg
get_addr_reg(struct brw_indirect ptr
)
708 return brw_address_reg(ptr
.addr_subnr
);
711 static INLINE
struct brw_indirect
brw_indirect_offset( struct brw_indirect ptr
, GLint offset
)
713 ptr
.addr_offset
+= offset
;
717 static INLINE
struct brw_indirect
brw_indirect( GLuint addr_subnr
, GLint offset
)
719 struct brw_indirect ptr
;
720 ptr
.addr_subnr
= addr_subnr
;
721 ptr
.addr_offset
= offset
;
726 static INLINE
struct brw_instruction
*current_insn( struct brw_compile
*p
)
728 return &p
->store
[p
->nr_insn
];
731 void brw_pop_insn_state( struct brw_compile
*p
);
732 void brw_push_insn_state( struct brw_compile
*p
);
733 void brw_set_mask_control( struct brw_compile
*p
, GLuint value
);
734 void brw_set_saturate( struct brw_compile
*p
, GLuint value
);
735 void brw_set_access_mode( struct brw_compile
*p
, GLuint access_mode
);
736 void brw_set_compression_control( struct brw_compile
*p
, GLboolean control
);
737 void brw_set_predicate_control_flag_value( struct brw_compile
*p
, GLuint value
);
738 void brw_set_predicate_control( struct brw_compile
*p
, GLuint pc
);
739 void brw_set_conditionalmod( struct brw_compile
*p
, GLuint conditional
);
741 void brw_init_compile( struct brw_context
*, struct brw_compile
*p
);
742 const GLuint
*brw_get_program( struct brw_compile
*p
, GLuint
*sz
);
745 /* Helpers for regular instructions:
748 struct brw_instruction *brw_##OP(struct brw_compile *p, \
749 struct brw_reg dest, \
750 struct brw_reg src0);
753 struct brw_instruction *brw_##OP(struct brw_compile *p, \
754 struct brw_reg dest, \
755 struct brw_reg src0, \
756 struct brw_reg src1);
789 /* Helpers for SEND instruction:
791 void brw_urb_WRITE(struct brw_compile
*p
,
798 GLuint response_length
,
800 GLboolean writes_complete
,
804 void brw_fb_WRITE(struct brw_compile
*p
,
808 GLuint binding_table_index
,
810 GLuint response_length
,
813 void brw_SAMPLE(struct brw_compile
*p
,
817 GLuint binding_table_index
,
821 GLuint response_length
,
825 void brw_math_16( struct brw_compile
*p
,
833 void brw_math( struct brw_compile
*p
,
842 void brw_dp_READ_16( struct brw_compile
*p
,
845 GLuint scratch_offset
);
847 void brw_dp_WRITE_16( struct brw_compile
*p
,
850 GLuint scratch_offset
);
852 /* If/else/endif. Works by manipulating the execution flags on each
855 struct brw_instruction
*brw_IF(struct brw_compile
*p
,
856 GLuint execute_size
);
858 struct brw_instruction
*brw_ELSE(struct brw_compile
*p
,
859 struct brw_instruction
*if_insn
);
861 void brw_ENDIF(struct brw_compile
*p
,
862 struct brw_instruction
*if_or_else_insn
);
867 struct brw_instruction
*brw_DO(struct brw_compile
*p
,
868 GLuint execute_size
);
870 struct brw_instruction
*brw_WHILE(struct brw_compile
*p
,
871 struct brw_instruction
*patch_insn
);
873 struct brw_instruction
*brw_BREAK(struct brw_compile
*p
);
874 struct brw_instruction
*brw_CONT(struct brw_compile
*p
);
877 void brw_land_fwd_jump(struct brw_compile
*p
,
878 struct brw_instruction
*jmp_insn
);
882 void brw_NOP(struct brw_compile
*p
);
884 /* Special case: there is never a destination, execution size will be
887 void brw_CMP(struct brw_compile
*p
,
891 struct brw_reg src1
);
893 void brw_print_reg( struct brw_reg reg
);
896 /***********************************************************************
900 void brw_copy_indirect_to_indirect(struct brw_compile
*p
,
901 struct brw_indirect dst_ptr
,
902 struct brw_indirect src_ptr
,
905 void brw_copy_from_indirect(struct brw_compile
*p
,
907 struct brw_indirect ptr
,
910 void brw_copy4(struct brw_compile
*p
,
915 void brw_copy8(struct brw_compile
*p
,
920 void brw_math_invert( struct brw_compile
*p
,
924 void brw_set_src1( struct brw_instruction
*insn
,
925 struct brw_reg reg
);