2 /* FF is big and ugly so feel free to write lines as long as you like.
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
27 #define DBG_CHANNEL DBG_FF
29 #define NINE_FF_NUM_VS_CONST 196
30 #define NINE_FF_NUM_PS_CONST 24
41 uint32_t position_t
: 1;
42 uint32_t lighting
: 1;
43 uint32_t darkness
: 1; /* lighting enabled but no active lights */
44 uint32_t localviewer
: 1;
45 uint32_t vertexpointsize
: 1;
46 uint32_t pointscale
: 1;
47 uint32_t vertexblend
: 3;
48 uint32_t vertexblend_indexed
: 1;
49 uint32_t vertextween
: 1;
50 uint32_t mtl_diffuse
: 2; /* 0 = material, 1 = color1, 2 = color2 */
51 uint32_t mtl_ambient
: 2;
52 uint32_t mtl_specular
: 2;
53 uint32_t mtl_emissive
: 2;
54 uint32_t fog_mode
: 2;
55 uint32_t fog_range
: 1;
56 uint32_t color0in_one
: 1;
57 uint32_t color1in_zero
: 1;
58 uint32_t has_normal
: 1;
60 uint32_t normalizenormals
: 1;
63 uint32_t tc_dim_input
: 16; /* 8 * 2 bits */
65 uint32_t tc_dim_output
: 24; /* 8 * 3 bits */
67 uint32_t tc_gen
: 24; /* 8 * 3 bits */
73 uint64_t value64
[3]; /* don't forget to resize VertexShader9.ff_key */
78 /* Texture stage state:
80 * COLOROP D3DTOP 5 bit
81 * ALPHAOP D3DTOP 5 bit
82 * COLORARG0 D3DTA 3 bit
83 * COLORARG1 D3DTA 3 bit
84 * COLORARG2 D3DTA 3 bit
85 * ALPHAARG0 D3DTA 3 bit
86 * ALPHAARG1 D3DTA 3 bit
87 * ALPHAARG2 D3DTA 3 bit
88 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
89 * TEXCOORDINDEX 0 - 7 3 bit
90 * ===========================
100 uint32_t colorarg0
: 3;
101 uint32_t colorarg1
: 3;
102 uint32_t colorarg2
: 3;
103 uint32_t alphaarg0
: 3;
104 uint32_t alphaarg1
: 3;
105 uint32_t alphaarg2
: 3;
106 uint32_t resultarg
: 1; /* CURRENT:0 or TEMP:1 */
107 uint32_t textarget
: 2; /* 1D/2D/3D/CUBE */
109 /* that's 32 bit exactly */
111 uint32_t projected
: 16;
112 uint32_t fog
: 1; /* for vFog coming from VS */
113 uint32_t fog_mode
: 2;
114 uint32_t fog_source
: 1; /* 0: Z, 1: W */
115 uint32_t specular
: 1;
116 uint32_t pad1
: 11; /* 9 32-bit words with this */
117 uint8_t colorarg_b4
[3];
118 uint8_t colorarg_b5
[3];
119 uint8_t alphaarg_b4
[3]; /* 11 32-bit words plus a byte */
122 uint64_t value64
[6]; /* don't forget to resize PixelShader9.ff_key */
123 uint32_t value32
[12];
127 static unsigned nine_ff_vs_key_hash(void *key
)
129 struct nine_ff_vs_key
*vs
= key
;
131 uint32_t hash
= vs
->value32
[0];
132 for (i
= 1; i
< ARRAY_SIZE(vs
->value32
); ++i
)
133 hash
^= vs
->value32
[i
];
136 static int nine_ff_vs_key_comp(void *key1
, void *key2
)
138 struct nine_ff_vs_key
*a
= (struct nine_ff_vs_key
*)key1
;
139 struct nine_ff_vs_key
*b
= (struct nine_ff_vs_key
*)key2
;
141 return memcmp(a
->value64
, b
->value64
, sizeof(a
->value64
));
143 static unsigned nine_ff_ps_key_hash(void *key
)
145 struct nine_ff_ps_key
*ps
= key
;
147 uint32_t hash
= ps
->value32
[0];
148 for (i
= 1; i
< ARRAY_SIZE(ps
->value32
); ++i
)
149 hash
^= ps
->value32
[i
];
152 static int nine_ff_ps_key_comp(void *key1
, void *key2
)
154 struct nine_ff_ps_key
*a
= (struct nine_ff_ps_key
*)key1
;
155 struct nine_ff_ps_key
*b
= (struct nine_ff_ps_key
*)key2
;
157 return memcmp(a
->value64
, b
->value64
, sizeof(a
->value64
));
159 static unsigned nine_ff_fvf_key_hash(void *key
)
161 return *(DWORD
*)key
;
163 static int nine_ff_fvf_key_comp(void *key1
, void *key2
)
165 return *(DWORD
*)key1
!= *(DWORD
*)key2
;
168 static void nine_ff_prune_vs(struct NineDevice9
*);
169 static void nine_ff_prune_ps(struct NineDevice9
*);
171 static void nine_ureg_tgsi_dump(struct ureg_program
*ureg
, boolean override
)
173 if (debug_get_bool_option("NINE_FF_DUMP", FALSE
) || override
) {
174 const struct tgsi_token
*toks
= ureg_get_tokens(ureg
, NULL
);
176 ureg_free_tokens(toks
);
180 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
181 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
182 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
183 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
185 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
186 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
187 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
188 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
192 /* AL should contain base address of lights table. */
193 #define LIGHT_CONST(i) \
194 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
196 #define MATERIAL_CONST(i) \
197 ureg_DECL_constant(ureg, 19 + (i))
199 #define _CONST(n) ureg_DECL_constant(ureg, n)
201 /* VS FF constants layout:
203 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
204 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
205 * CONST[ 8..11] D3DTS_PROJECTION
206 * CONST[12..15] D3DTS_VIEW^(-1)
207 * CONST[16..18] Normal matrix
209 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient
210 * CONST[20] MATERIAL.Diffuse
211 * CONST[21] MATERIAL.Ambient
212 * CONST[22] MATERIAL.Specular
213 * CONST[23].x___ MATERIAL.Power
214 * CONST[24] MATERIAL.Emissive
215 * CONST[25] RS.Ambient
217 * CONST[26].x___ RS.PointSizeMin
218 * CONST[26]._y__ RS.PointSizeMax
219 * CONST[26].__z_ RS.PointSize
220 * CONST[26].___w RS.PointScaleA
221 * CONST[27].x___ RS.PointScaleB
222 * CONST[27]._y__ RS.PointScaleC
224 * CONST[28].x___ RS.FogEnd
225 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
226 * CONST[28].__z_ RS.FogDensity
228 * CONST[30].x___ TWEENFACTOR
230 * CONST[32].x___ LIGHT[0].Type
231 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
232 * CONST[33] LIGHT[0].Diffuse
233 * CONST[34] LIGHT[0].Specular
234 * CONST[35] LIGHT[0].Ambient
235 * CONST[36].xyz_ LIGHT[0].Position
236 * CONST[36].___w LIGHT[0].Range
237 * CONST[37].xyz_ LIGHT[0].Direction
238 * CONST[37].___w LIGHT[0].Falloff
239 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
240 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
241 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
242 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
243 * CONST[39].___w 1 if this is the last active light, 0 if not
251 * NOTE: no lighting code is generated if there are no active lights
253 * CONST[100].x___ Viewport 2/width
254 * CONST[100]._y__ Viewport 2/height
255 * CONST[100].__z_ Viewport 1/(zmax - zmin)
256 * CONST[100].___w Viewport width
257 * CONST[101].x___ Viewport x0
258 * CONST[101]._y__ Viewport y0
259 * CONST[101].__z_ Viewport z0
261 * CONST[128..131] D3DTS_TEXTURE0
262 * CONST[132..135] D3DTS_TEXTURE1
263 * CONST[136..139] D3DTS_TEXTURE2
264 * CONST[140..143] D3DTS_TEXTURE3
265 * CONST[144..147] D3DTS_TEXTURE4
266 * CONST[148..151] D3DTS_TEXTURE5
267 * CONST[152..155] D3DTS_TEXTURE6
268 * CONST[156..159] D3DTS_TEXTURE7
270 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
271 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
273 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
277 struct ureg_program
*ureg
;
278 const struct nine_ff_vs_key
*key
;
280 uint16_t input
[PIPE_MAX_ATTRIBS
];
283 struct ureg_src aVtx
;
284 struct ureg_src aNrm
;
285 struct ureg_src aCol
[2];
286 struct ureg_src aTex
[8];
287 struct ureg_src aPsz
;
288 struct ureg_src aInd
;
289 struct ureg_src aWgt
;
291 struct ureg_src aVtx1
; /* tweening */
292 struct ureg_src aNrm1
;
294 struct ureg_src mtlA
;
295 struct ureg_src mtlD
;
296 struct ureg_src mtlS
;
297 struct ureg_src mtlE
;
300 static inline unsigned
301 get_texcoord_sn(struct pipe_screen
*screen
)
303 if (screen
->get_param(screen
, PIPE_CAP_TGSI_TEXCOORD
))
304 return TGSI_SEMANTIC_TEXCOORD
;
305 return TGSI_SEMANTIC_GENERIC
;
308 static inline struct ureg_src
309 build_vs_add_input(struct vs_build_ctx
*vs
, uint16_t ndecl
)
311 const unsigned i
= vs
->num_inputs
++;
312 assert(i
< PIPE_MAX_ATTRIBS
);
313 vs
->input
[i
] = ndecl
;
314 return ureg_DECL_vs_input(vs
->ureg
, i
);
317 /* NOTE: dst may alias src */
319 ureg_normalize3(struct ureg_program
*ureg
,
320 struct ureg_dst dst
, struct ureg_src src
)
322 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
323 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
325 ureg_DP3(ureg
, tmp_x
, src
, src
);
326 ureg_RSQ(ureg
, tmp_x
, _X(tmp
));
327 ureg_MUL(ureg
, dst
, src
, _X(tmp
));
328 ureg_release_temporary(ureg
, tmp
);
332 nine_ff_build_vs(struct NineDevice9
*device
, struct vs_build_ctx
*vs
)
334 const struct nine_ff_vs_key
*key
= vs
->key
;
335 struct ureg_program
*ureg
= ureg_create(PIPE_SHADER_VERTEX
);
336 struct ureg_dst oPos
, oCol
[2], oPsz
, oFog
;
339 unsigned label
[32], l
= 0;
340 boolean need_aNrm
= key
->lighting
|| key
->passthrough
& (1 << NINE_DECLUSAGE_NORMAL
);
341 boolean has_aNrm
= need_aNrm
&& key
->has_normal
;
342 boolean need_aVtx
= key
->lighting
|| key
->fog_mode
|| key
->pointscale
|| key
->ucp
;
343 const unsigned texcoord_sn
= get_texcoord_sn(device
->screen
);
347 /* Check which inputs we should transform. */
348 for (i
= 0; i
< 8 * 3; i
+= 3) {
349 switch ((key
->tc_gen
>> i
) & 0x7) {
350 case NINED3DTSS_TCI_CAMERASPACENORMAL
:
353 case NINED3DTSS_TCI_CAMERASPACEPOSITION
:
356 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR
:
357 need_aVtx
= need_aNrm
= TRUE
;
359 case NINED3DTSS_TCI_SPHEREMAP
:
360 need_aVtx
= need_aNrm
= TRUE
;
367 /* Declare and record used inputs (needed for linkage with vertex format):
368 * (texture coordinates handled later)
370 vs
->aVtx
= build_vs_add_input(vs
,
371 key
->position_t
? NINE_DECLUSAGE_POSITIONT
: NINE_DECLUSAGE_POSITION
);
373 vs
->aNrm
= ureg_imm1f(ureg
, 0.0f
);
375 vs
->aNrm
= build_vs_add_input(vs
, NINE_DECLUSAGE_NORMAL
);
377 vs
->aCol
[0] = ureg_imm1f(ureg
, 1.0f
);
378 vs
->aCol
[1] = ureg_imm1f(ureg
, 0.0f
);
380 if (key
->lighting
|| key
->darkness
) {
381 const unsigned mask
= key
->mtl_diffuse
| key
->mtl_specular
|
382 key
->mtl_ambient
| key
->mtl_emissive
;
383 if ((mask
& 0x1) && !key
->color0in_one
)
384 vs
->aCol
[0] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 0));
385 if ((mask
& 0x2) && !key
->color1in_zero
)
386 vs
->aCol
[1] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 1));
388 vs
->mtlD
= MATERIAL_CONST(1);
389 vs
->mtlA
= MATERIAL_CONST(2);
390 vs
->mtlS
= MATERIAL_CONST(3);
391 vs
->mtlE
= MATERIAL_CONST(5);
392 if (key
->mtl_diffuse
== 1) vs
->mtlD
= vs
->aCol
[0]; else
393 if (key
->mtl_diffuse
== 2) vs
->mtlD
= vs
->aCol
[1];
394 if (key
->mtl_ambient
== 1) vs
->mtlA
= vs
->aCol
[0]; else
395 if (key
->mtl_ambient
== 2) vs
->mtlA
= vs
->aCol
[1];
396 if (key
->mtl_specular
== 1) vs
->mtlS
= vs
->aCol
[0]; else
397 if (key
->mtl_specular
== 2) vs
->mtlS
= vs
->aCol
[1];
398 if (key
->mtl_emissive
== 1) vs
->mtlE
= vs
->aCol
[0]; else
399 if (key
->mtl_emissive
== 2) vs
->mtlE
= vs
->aCol
[1];
401 if (!key
->color0in_one
) vs
->aCol
[0] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 0));
402 if (!key
->color1in_zero
) vs
->aCol
[1] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(COLOR
, 1));
405 if (key
->vertexpointsize
)
406 vs
->aPsz
= build_vs_add_input(vs
, NINE_DECLUSAGE_PSIZE
);
408 if (key
->vertexblend_indexed
|| key
->passthrough
& (1 << NINE_DECLUSAGE_BLENDINDICES
))
409 vs
->aInd
= build_vs_add_input(vs
, NINE_DECLUSAGE_BLENDINDICES
);
410 if (key
->vertexblend
|| key
->passthrough
& (1 << NINE_DECLUSAGE_BLENDWEIGHT
))
411 vs
->aWgt
= build_vs_add_input(vs
, NINE_DECLUSAGE_BLENDWEIGHT
);
412 if (key
->vertextween
) {
413 vs
->aVtx1
= build_vs_add_input(vs
, NINE_DECLUSAGE_i(POSITION
,1));
414 vs
->aNrm1
= build_vs_add_input(vs
, NINE_DECLUSAGE_i(NORMAL
,1));
419 oPos
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_POSITION
, 0); /* HPOS */
420 oCol
[0] = ureg_saturate(ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0));
421 oCol
[1] = ureg_saturate(ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 1));
422 if (key
->fog
|| key
->passthrough
& (1 << NINE_DECLUSAGE_FOG
)) {
423 oFog
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_FOG
, 0);
424 oFog
= ureg_writemask(oFog
, TGSI_WRITEMASK_X
);
427 if (key
->vertexpointsize
|| key
->pointscale
) {
428 oPsz
= ureg_DECL_output_masked(ureg
, TGSI_SEMANTIC_PSIZE
, 0,
429 TGSI_WRITEMASK_X
, 0, 1);
430 oPsz
= ureg_writemask(oPsz
, TGSI_WRITEMASK_X
);
433 if (key
->lighting
|| key
->vertexblend
)
434 AR
= ureg_DECL_address(ureg
);
436 /* === Vertex transformation / vertex blending:
439 if (key
->position_t
) {
440 if (device
->driver_caps
.window_space_position_support
) {
441 ureg_MOV(ureg
, oPos
, vs
->aVtx
);
443 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
444 /* vs->aVtx contains the coordinates buffer wise.
445 * later in the pipeline, clipping, viewport and division
446 * by w (rhw = 1/w) are going to be applied, so do the reverse
447 * of these transformations (except clipping) to have the good
448 * position at the end.*/
449 ureg_MOV(ureg
, tmp
, vs
->aVtx
);
450 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
451 ureg_ADD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(tmp
), ureg_negate(_CONST(101)));
452 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(tmp
), _CONST(100));
453 ureg_ADD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XY
), ureg_src(tmp
), ureg_imm1f(ureg
, -1.0f
));
454 /* Y needs to be reversed */
455 ureg_MOV(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_Y
), ureg_negate(ureg_src(tmp
)));
457 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_W
), _W(tmp
));
458 /* multiply X, Y, Z by w */
459 ureg_MUL(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(tmp
), _W(tmp
));
460 ureg_MOV(ureg
, oPos
, ureg_src(tmp
));
461 ureg_release_temporary(ureg
, tmp
);
463 } else if (key
->vertexblend
) {
464 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
465 struct ureg_dst tmp2
= ureg_DECL_temporary(ureg
);
466 struct ureg_dst aVtx_dst
= ureg_DECL_temporary(ureg
);
467 struct ureg_dst aNrm_dst
= ureg_DECL_temporary(ureg
);
468 struct ureg_dst sum_blendweights
= ureg_DECL_temporary(ureg
);
469 struct ureg_src cWM
[4];
471 for (i
= 160; i
<= 195; ++i
)
472 ureg_DECL_constant(ureg
, i
);
474 /* translate world matrix index to constant file index */
475 if (key
->vertexblend_indexed
) {
476 ureg_MAD(ureg
, tmp
, vs
->aInd
, ureg_imm1f(ureg
, 4.0f
), ureg_imm1f(ureg
, 160.0f
));
477 ureg_ARL(ureg
, AR
, ureg_src(tmp
));
480 ureg_MOV(ureg
, aVtx_dst
, ureg_imm4f(ureg
, 0.0f
, 0.0f
, 0.0f
, 0.0f
));
481 ureg_MOV(ureg
, aNrm_dst
, ureg_imm4f(ureg
, 0.0f
, 0.0f
, 0.0f
, 0.0f
));
482 ureg_MOV(ureg
, sum_blendweights
, ureg_imm4f(ureg
, 1.0f
, 1.0f
, 1.0f
, 1.0f
));
484 for (i
= 0; i
< key
->vertexblend
; ++i
) {
485 for (c
= 0; c
< 4; ++c
) {
486 cWM
[c
] = ureg_src_register(TGSI_FILE_CONSTANT
, (160 + i
* 4) * !key
->vertexblend_indexed
+ c
);
487 if (key
->vertexblend_indexed
)
488 cWM
[c
] = ureg_src_indirect(cWM
[c
], ureg_scalar(ureg_src(AR
), i
));
491 /* multiply by WORLD(index) */
492 ureg_MUL(ureg
, tmp
, _XXXX(vs
->aVtx
), cWM
[0]);
493 ureg_MAD(ureg
, tmp
, _YYYY(vs
->aVtx
), cWM
[1], ureg_src(tmp
));
494 ureg_MAD(ureg
, tmp
, _ZZZZ(vs
->aVtx
), cWM
[2], ureg_src(tmp
));
495 ureg_MAD(ureg
, tmp
, _WWWW(vs
->aVtx
), cWM
[3], ureg_src(tmp
));
498 /* Note: the spec says the transpose of the inverse of the
499 * WorldView matrices should be used, but all tests show
501 * Only case unknown: D3DVBF_0WEIGHTS */
502 ureg_MUL(ureg
, tmp2
, _XXXX(vs
->aNrm
), cWM
[0]);
503 ureg_MAD(ureg
, tmp2
, _YYYY(vs
->aNrm
), cWM
[1], ureg_src(tmp2
));
504 ureg_MAD(ureg
, tmp2
, _ZZZZ(vs
->aNrm
), cWM
[2], ureg_src(tmp2
));
507 if (i
< (key
->vertexblend
- 1)) {
508 /* accumulate weighted position value */
509 ureg_MAD(ureg
, aVtx_dst
, ureg_src(tmp
), ureg_scalar(vs
->aWgt
, i
), ureg_src(aVtx_dst
));
511 ureg_MAD(ureg
, aNrm_dst
, ureg_src(tmp2
), ureg_scalar(vs
->aWgt
, i
), ureg_src(aNrm_dst
));
512 /* subtract weighted position value for last value */
513 ureg_ADD(ureg
, sum_blendweights
, ureg_src(sum_blendweights
), ureg_negate(ureg_scalar(vs
->aWgt
, i
)));
517 /* the last weighted position is always 1 - sum_of_previous_weights */
518 ureg_MAD(ureg
, aVtx_dst
, ureg_src(tmp
), ureg_scalar(ureg_src(sum_blendweights
), key
->vertexblend
- 1), ureg_src(aVtx_dst
));
520 ureg_MAD(ureg
, aNrm_dst
, ureg_src(tmp2
), ureg_scalar(ureg_src(sum_blendweights
), key
->vertexblend
- 1), ureg_src(aNrm_dst
));
522 /* multiply by VIEW_PROJ */
523 ureg_MUL(ureg
, tmp
, _X(aVtx_dst
), _CONST(8));
524 ureg_MAD(ureg
, tmp
, _Y(aVtx_dst
), _CONST(9), ureg_src(tmp
));
525 ureg_MAD(ureg
, tmp
, _Z(aVtx_dst
), _CONST(10), ureg_src(tmp
));
526 ureg_MAD(ureg
, oPos
, _W(aVtx_dst
), _CONST(11), ureg_src(tmp
));
529 vs
->aVtx
= ureg_src(aVtx_dst
);
531 ureg_release_temporary(ureg
, tmp
);
532 ureg_release_temporary(ureg
, tmp2
);
533 ureg_release_temporary(ureg
, sum_blendweights
);
535 ureg_release_temporary(ureg
, aVtx_dst
);
538 if (key
->normalizenormals
)
539 ureg_normalize3(ureg
, aNrm_dst
, ureg_src(aNrm_dst
));
540 vs
->aNrm
= ureg_src(aNrm_dst
);
542 ureg_release_temporary(ureg
, aNrm_dst
);
544 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
546 if (key
->vertextween
) {
547 struct ureg_dst aVtx_dst
= ureg_DECL_temporary(ureg
);
548 ureg_LRP(ureg
, aVtx_dst
, _XXXX(_CONST(30)), vs
->aVtx1
, vs
->aVtx
);
549 vs
->aVtx
= ureg_src(aVtx_dst
);
551 struct ureg_dst aNrm_dst
= ureg_DECL_temporary(ureg
);
552 ureg_LRP(ureg
, aNrm_dst
, _XXXX(_CONST(30)), vs
->aNrm1
, vs
->aNrm
);
553 vs
->aNrm
= ureg_src(aNrm_dst
);
557 /* position = vertex * WORLD_VIEW_PROJ */
558 ureg_MUL(ureg
, tmp
, _XXXX(vs
->aVtx
), _CONST(0));
559 ureg_MAD(ureg
, tmp
, _YYYY(vs
->aVtx
), _CONST(1), ureg_src(tmp
));
560 ureg_MAD(ureg
, tmp
, _ZZZZ(vs
->aVtx
), _CONST(2), ureg_src(tmp
));
561 ureg_MAD(ureg
, oPos
, _WWWW(vs
->aVtx
), _CONST(3), ureg_src(tmp
));
562 ureg_release_temporary(ureg
, tmp
);
565 struct ureg_dst aVtx_dst
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_XYZ
);
566 ureg_MUL(ureg
, aVtx_dst
, _XXXX(vs
->aVtx
), _CONST(4));
567 ureg_MAD(ureg
, aVtx_dst
, _YYYY(vs
->aVtx
), _CONST(5), ureg_src(aVtx_dst
));
568 ureg_MAD(ureg
, aVtx_dst
, _ZZZZ(vs
->aVtx
), _CONST(6), ureg_src(aVtx_dst
));
569 ureg_MAD(ureg
, aVtx_dst
, _WWWW(vs
->aVtx
), _CONST(7), ureg_src(aVtx_dst
));
570 vs
->aVtx
= ureg_src(aVtx_dst
);
573 struct ureg_dst aNrm_dst
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_XYZ
);
574 ureg_MUL(ureg
, aNrm_dst
, _XXXX(vs
->aNrm
), _CONST(16));
575 ureg_MAD(ureg
, aNrm_dst
, _YYYY(vs
->aNrm
), _CONST(17), ureg_src(aNrm_dst
));
576 ureg_MAD(ureg
, aNrm_dst
, _ZZZZ(vs
->aNrm
), _CONST(18), ureg_src(aNrm_dst
));
577 if (key
->normalizenormals
)
578 ureg_normalize3(ureg
, aNrm_dst
, ureg_src(aNrm_dst
));
579 vs
->aNrm
= ureg_src(aNrm_dst
);
583 /* === Process point size:
585 if (key
->vertexpointsize
|| key
->pointscale
) {
586 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
587 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
588 struct ureg_dst tmp_y
= ureg_writemask(tmp
, TGSI_WRITEMASK_Y
);
589 struct ureg_dst tmp_z
= ureg_writemask(tmp
, TGSI_WRITEMASK_Z
);
590 if (key
->vertexpointsize
) {
591 struct ureg_src cPsz1
= ureg_DECL_constant(ureg
, 26);
592 ureg_MAX(ureg
, tmp_z
, _XXXX(vs
->aPsz
), _XXXX(cPsz1
));
593 ureg_MIN(ureg
, tmp_z
, _Z(tmp
), _YYYY(cPsz1
));
595 struct ureg_src cPsz1
= ureg_DECL_constant(ureg
, 26);
596 ureg_MOV(ureg
, tmp_z
, _ZZZZ(cPsz1
));
599 if (key
->pointscale
) {
600 struct ureg_src cPsz1
= ureg_DECL_constant(ureg
, 26);
601 struct ureg_src cPsz2
= ureg_DECL_constant(ureg
, 27);
603 ureg_DP3(ureg
, tmp_x
, vs
->aVtx
, vs
->aVtx
);
604 ureg_RSQ(ureg
, tmp_y
, _X(tmp
));
605 ureg_MUL(ureg
, tmp_y
, _Y(tmp
), _X(tmp
));
606 ureg_CMP(ureg
, tmp_y
, ureg_negate(_Y(tmp
)), _Y(tmp
), ureg_imm1f(ureg
, 0.0f
));
607 ureg_MAD(ureg
, tmp_x
, _Y(tmp
), _YYYY(cPsz2
), _XXXX(cPsz2
));
608 ureg_MAD(ureg
, tmp_x
, _Y(tmp
), _X(tmp
), _WWWW(cPsz1
));
609 ureg_RSQ(ureg
, tmp_x
, _X(tmp
));
610 ureg_MUL(ureg
, tmp_x
, _X(tmp
), _Z(tmp
));
611 ureg_MUL(ureg
, tmp_x
, _X(tmp
), _WWWW(_CONST(100)));
612 ureg_MAX(ureg
, tmp_x
, _X(tmp
), _XXXX(cPsz1
));
613 ureg_MIN(ureg
, tmp_z
, _X(tmp
), _YYYY(cPsz1
));
616 ureg_MOV(ureg
, oPsz
, _Z(tmp
));
617 ureg_release_temporary(ureg
, tmp
);
620 for (i
= 0; i
< 8; ++i
) {
621 struct ureg_dst tmp
, tmp_x
, tmp2
;
622 struct ureg_dst oTex
, input_coord
, transformed
, t
, aVtx_normed
;
623 unsigned c
, writemask
;
624 const unsigned tci
= (key
->tc_gen
>> (i
* 3)) & 0x7;
625 const unsigned idx
= (key
->tc_idx
>> (i
* 3)) & 0x7;
626 unsigned dim_input
= 1 + ((key
->tc_dim_input
>> (i
* 2)) & 0x3);
627 const unsigned dim_output
= (key
->tc_dim_output
>> (i
* 3)) & 0x7;
629 /* No texture output of index s */
630 if (tci
== NINED3DTSS_TCI_DISABLE
)
632 oTex
= ureg_DECL_output(ureg
, texcoord_sn
, i
);
633 tmp
= ureg_DECL_temporary(ureg
);
634 tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
635 input_coord
= ureg_DECL_temporary(ureg
);
636 transformed
= ureg_DECL_temporary(ureg
);
638 /* Get the coordinate */
640 case NINED3DTSS_TCI_PASSTHRU
:
641 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
642 * Else the idx is used only to determine wrapping mode. */
643 vs
->aTex
[idx
] = build_vs_add_input(vs
, NINE_DECLUSAGE_i(TEXCOORD
,idx
));
644 ureg_MOV(ureg
, input_coord
, vs
->aTex
[idx
]);
646 case NINED3DTSS_TCI_CAMERASPACENORMAL
:
647 ureg_MOV(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_XYZ
), vs
->aNrm
);
648 ureg_MOV(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
651 case NINED3DTSS_TCI_CAMERASPACEPOSITION
:
652 ureg_MOV(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_XYZ
), vs
->aVtx
);
653 ureg_MOV(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
656 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR
:
657 tmp
.WriteMask
= TGSI_WRITEMASK_XYZ
;
658 aVtx_normed
= ureg_DECL_temporary(ureg
);
659 ureg_normalize3(ureg
, aVtx_normed
, vs
->aVtx
);
660 ureg_DP3(ureg
, tmp_x
, ureg_src(aVtx_normed
), vs
->aNrm
);
661 ureg_MUL(ureg
, tmp
, vs
->aNrm
, _X(tmp
));
662 ureg_ADD(ureg
, tmp
, ureg_src(tmp
), ureg_src(tmp
));
663 ureg_ADD(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_XYZ
), ureg_src(aVtx_normed
), ureg_negate(ureg_src(tmp
)));
664 ureg_MOV(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_W
), ureg_imm1f(ureg
, 1.0f
));
665 ureg_release_temporary(ureg
, aVtx_normed
);
667 tmp
.WriteMask
= TGSI_WRITEMASK_XYZW
;
669 case NINED3DTSS_TCI_SPHEREMAP
:
670 /* Implement the formula of GL_SPHERE_MAP */
671 tmp
.WriteMask
= TGSI_WRITEMASK_XYZ
;
672 aVtx_normed
= ureg_DECL_temporary(ureg
);
673 tmp2
= ureg_DECL_temporary(ureg
);
674 ureg_normalize3(ureg
, aVtx_normed
, vs
->aVtx
);
675 ureg_DP3(ureg
, tmp_x
, ureg_src(aVtx_normed
), vs
->aNrm
);
676 ureg_MUL(ureg
, tmp
, vs
->aNrm
, _X(tmp
));
677 ureg_ADD(ureg
, tmp
, ureg_src(tmp
), ureg_src(tmp
));
678 ureg_ADD(ureg
, tmp
, ureg_src(aVtx_normed
), ureg_negate(ureg_src(tmp
)));
679 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
680 ureg_MOV(ureg
, ureg_writemask(tmp2
, TGSI_WRITEMASK_XYZ
), ureg_src(tmp
));
681 ureg_MUL(ureg
, tmp2
, ureg_src(tmp2
), ureg_src(tmp2
));
682 ureg_DP3(ureg
, ureg_writemask(tmp2
, TGSI_WRITEMASK_X
), ureg_src(tmp2
), ureg_src(tmp2
));
683 ureg_RSQ(ureg
, ureg_writemask(tmp2
, TGSI_WRITEMASK_X
), ureg_src(tmp2
));
684 ureg_MUL(ureg
, ureg_writemask(tmp2
, TGSI_WRITEMASK_X
), ureg_src(tmp2
), ureg_imm1f(ureg
, 0.5f
));
685 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
686 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
687 ureg_MUL(ureg
, tmp
, ureg_src(tmp
), _X(tmp2
));
688 ureg_ADD(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_XY
), ureg_src(tmp
), ureg_imm1f(ureg
, 0.5f
));
689 ureg_MOV(ureg
, ureg_writemask(input_coord
, TGSI_WRITEMASK_ZW
), ureg_imm4f(ureg
, 0.0f
, 0.0f
, 0.0f
, 1.0f
));
690 ureg_release_temporary(ureg
, aVtx_normed
);
691 ureg_release_temporary(ureg
, tmp2
);
693 tmp
.WriteMask
= TGSI_WRITEMASK_XYZW
;
700 /* Apply the transformation */
701 /* dim_output == 0 => do not transform the components.
702 * XYZRHW also disables transformation */
703 if (!dim_output
|| key
->position_t
) {
704 ureg_release_temporary(ureg
, transformed
);
705 transformed
= input_coord
;
706 writemask
= TGSI_WRITEMASK_XYZW
;
708 for (c
= 0; c
< dim_output
; c
++) {
709 t
= ureg_writemask(transformed
, 1 << c
);
711 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
712 case 1: ureg_MAD(ureg
, t
, _X(input_coord
), _XXXX(_CONST(128 + i
* 4 + c
)), _YYYY(_CONST(128 + i
* 4 + c
)));
714 case 2: ureg_DP2(ureg
, t
, ureg_src(input_coord
), _CONST(128 + i
* 4 + c
));
715 ureg_ADD(ureg
, t
, ureg_src(transformed
), _ZZZZ(_CONST(128 + i
* 4 + c
)));
717 case 3: ureg_DP3(ureg
, t
, ureg_src(input_coord
), _CONST(128 + i
* 4 + c
));
718 ureg_ADD(ureg
, t
, ureg_src(transformed
), _WWWW(_CONST(128 + i
* 4 + c
)));
720 case 4: ureg_DP4(ureg
, t
, ureg_src(input_coord
), _CONST(128 + i
* 4 + c
)); break;
725 writemask
= (1 << dim_output
) - 1;
726 ureg_release_temporary(ureg
, input_coord
);
729 ureg_MOV(ureg
, ureg_writemask(oTex
, writemask
), ureg_src(transformed
));
730 ureg_release_temporary(ureg
, transformed
);
731 ureg_release_temporary(ureg
, tmp
);
736 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
737 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
738 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
740 * vec3 normal = normalize(in.Normal * NormalMatrix);
741 * vec3 hitDir = light.direction;
744 * if (light.type != DIRECTIONAL)
746 * vec3 hitVec = light.position - eyeVertex;
747 * float d = length(hitVec);
748 * hitDir = hitVec / d;
749 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
752 * if (light.type == SPOTLIGHT)
754 * float rho = dp3(-hitVec, light.direction);
755 * if (rho < cos(light.phi / 2))
757 * if (rho < cos(light.theta / 2))
758 * atten *= pow(some_func(rho), light.falloff);
761 * float nDotHit = dp3_sat(normal, hitVec);
762 * float powFact = 0.0;
766 * vec3 midVec = normalize(hitDir + eye);
767 * float nDotMid = dp3_sat(normal, midVec);
768 * pFact = pow(nDotMid, material.power);
771 * ambient += light.ambient * atten;
772 * diffuse += light.diffuse * atten * nDotHit;
773 * specular += light.specular * atten * powFact;
776 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
777 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
778 struct ureg_dst tmp_y
= ureg_writemask(tmp
, TGSI_WRITEMASK_Y
);
779 struct ureg_dst tmp_z
= ureg_writemask(tmp
, TGSI_WRITEMASK_Z
);
780 struct ureg_dst rAtt
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_W
);
781 struct ureg_dst rHit
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_XYZ
);
782 struct ureg_dst rMid
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_XYZ
);
784 struct ureg_dst rCtr
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_W
);
786 struct ureg_dst AL
= ureg_writemask(AR
, TGSI_WRITEMASK_X
);
788 /* Light.*.Alpha is not used. */
789 struct ureg_dst rD
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_XYZ
);
790 struct ureg_dst rA
= ureg_writemask(ureg_DECL_temporary(ureg
), TGSI_WRITEMASK_XYZ
);
791 struct ureg_dst rS
= ureg_DECL_temporary(ureg
);
793 struct ureg_src mtlP
= _XXXX(MATERIAL_CONST(4));
795 struct ureg_src cLKind
= _XXXX(LIGHT_CONST(0));
796 struct ureg_src cLAtt0
= _YYYY(LIGHT_CONST(0));
797 struct ureg_src cLAtt1
= _ZZZZ(LIGHT_CONST(0));
798 struct ureg_src cLAtt2
= _WWWW(LIGHT_CONST(0));
799 struct ureg_src cLColD
= _XYZW(LIGHT_CONST(1));
800 struct ureg_src cLColS
= _XYZW(LIGHT_CONST(2));
801 struct ureg_src cLColA
= _XYZW(LIGHT_CONST(3));
802 struct ureg_src cLPos
= _XYZW(LIGHT_CONST(4));
803 struct ureg_src cLRng
= _WWWW(LIGHT_CONST(4));
804 struct ureg_src cLDir
= _XYZW(LIGHT_CONST(5));
805 struct ureg_src cLFOff
= _WWWW(LIGHT_CONST(5));
806 struct ureg_src cLTht
= _XXXX(LIGHT_CONST(6));
807 struct ureg_src cLPhi
= _YYYY(LIGHT_CONST(6));
808 struct ureg_src cLSDiv
= _ZZZZ(LIGHT_CONST(6));
809 struct ureg_src cLLast
= _WWWW(LIGHT_CONST(7));
811 const unsigned loop_label
= l
++;
813 ureg_MOV(ureg
, rCtr
, ureg_imm1f(ureg
, 32.0f
)); /* &lightconst(0) */
814 ureg_MOV(ureg
, rD
, ureg_imm1f(ureg
, 0.0f
));
815 ureg_MOV(ureg
, rA
, ureg_imm1f(ureg
, 0.0f
));
816 ureg_MOV(ureg
, rS
, ureg_imm1f(ureg
, 0.0f
));
818 /* loop management */
819 ureg_BGNLOOP(ureg
, &label
[loop_label
]);
820 ureg_ARL(ureg
, AL
, _W(rCtr
));
822 /* if (not DIRECTIONAL light): */
823 ureg_SNE(ureg
, tmp_x
, cLKind
, ureg_imm1f(ureg
, D3DLIGHT_DIRECTIONAL
));
824 ureg_MOV(ureg
, rHit
, ureg_negate(cLDir
));
825 ureg_MOV(ureg
, rAtt
, ureg_imm1f(ureg
, 1.0f
));
826 ureg_IF(ureg
, _X(tmp
), &label
[l
++]);
828 /* hitDir = light.position - eyeVtx
831 ureg_ADD(ureg
, rHit
, cLPos
, ureg_negate(vs
->aVtx
));
832 ureg_DP3(ureg
, tmp_x
, ureg_src(rHit
), ureg_src(rHit
));
833 ureg_RSQ(ureg
, tmp_y
, _X(tmp
));
834 ureg_MUL(ureg
, tmp_x
, _X(tmp
), _Y(tmp
)); /* length */
836 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
837 ureg_MAD(ureg
, rAtt
, _X(tmp
), cLAtt2
, cLAtt1
);
838 ureg_MAD(ureg
, rAtt
, _X(tmp
), _W(rAtt
), cLAtt0
);
839 ureg_RCP(ureg
, rAtt
, _W(rAtt
));
840 /* cut-off if distance exceeds Light.Range */
841 ureg_SLT(ureg
, tmp_x
, _X(tmp
), cLRng
);
842 ureg_MUL(ureg
, rAtt
, _W(rAtt
), _X(tmp
));
844 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
847 /* normalize hitDir */
848 ureg_normalize3(ureg
, rHit
, ureg_src(rHit
));
850 /* if (SPOT light) */
851 ureg_SEQ(ureg
, tmp_x
, cLKind
, ureg_imm1f(ureg
, D3DLIGHT_SPOT
));
852 ureg_IF(ureg
, _X(tmp
), &label
[l
++]);
854 /* rho = dp3(-hitDir, light.spotDir)
856 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
859 * if (rho <= light.cphi2)
862 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
864 ureg_DP3(ureg
, tmp_y
, ureg_negate(ureg_src(rHit
)), cLDir
); /* rho */
865 ureg_ADD(ureg
, tmp_x
, _Y(tmp
), ureg_negate(cLPhi
));
866 ureg_MUL(ureg
, tmp_x
, _X(tmp
), cLSDiv
);
867 ureg_POW(ureg
, tmp_x
, _X(tmp
), cLFOff
); /* spotAtten */
868 ureg_SGE(ureg
, tmp_z
, _Y(tmp
), cLTht
); /* if inside theta && phi */
869 ureg_SGE(ureg
, tmp_y
, _Y(tmp
), cLPhi
); /* if inside phi */
870 ureg_MAD(ureg
, ureg_saturate(tmp_x
), _X(tmp
), _Y(tmp
), _Z(tmp
));
871 ureg_MUL(ureg
, rAtt
, _W(rAtt
), _X(tmp
));
873 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
876 /* directional factors, let's not use LIT because of clarity */
879 if (key
->localviewer
) {
880 ureg_normalize3(ureg
, rMid
, vs
->aVtx
);
881 ureg_ADD(ureg
, rMid
, ureg_src(rHit
), ureg_negate(ureg_src(rMid
)));
883 ureg_ADD(ureg
, rMid
, ureg_src(rHit
), ureg_imm3f(ureg
, 0.0f
, 0.0f
, -1.0f
));
885 ureg_normalize3(ureg
, rMid
, ureg_src(rMid
));
886 ureg_DP3(ureg
, ureg_saturate(tmp_x
), vs
->aNrm
, ureg_src(rHit
));
887 ureg_DP3(ureg
, ureg_saturate(tmp_y
), vs
->aNrm
, ureg_src(rMid
));
888 ureg_MUL(ureg
, tmp_z
, _X(tmp
), _Y(tmp
));
889 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
890 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
891 * No tests were made for backfacing, so add the two conditions */
892 ureg_IF(ureg
, _Z(tmp
), &label
[l
++]);
894 ureg_DP3(ureg
, ureg_saturate(tmp_y
), vs
->aNrm
, ureg_src(rMid
));
895 ureg_POW(ureg
, tmp_y
, _Y(tmp
), mtlP
);
896 ureg_MUL(ureg
, tmp_y
, _W(rAtt
), _Y(tmp
)); /* power factor * att */
897 ureg_MAD(ureg
, rS
, cLColS
, _Y(tmp
), ureg_src(rS
)); /* accumulate specular */
899 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
902 ureg_MUL(ureg
, tmp_x
, _W(rAtt
), _X(tmp
)); /* dp3(normal,hitDir) * att */
903 ureg_MAD(ureg
, rD
, cLColD
, _X(tmp
), ureg_src(rD
)); /* accumulate diffuse */
906 ureg_MAD(ureg
, rA
, cLColA
, _W(rAtt
), ureg_src(rA
)); /* accumulate ambient */
908 /* break if this was the last light */
909 ureg_IF(ureg
, cLLast
, &label
[l
++]);
912 ureg_fixup_label(ureg
, label
[l
-1], ureg_get_instruction_number(ureg
));
914 ureg_ADD(ureg
, rCtr
, _W(rCtr
), ureg_imm1f(ureg
, 8.0f
));
915 ureg_fixup_label(ureg
, label
[loop_label
], ureg_get_instruction_number(ureg
));
916 ureg_ENDLOOP(ureg
, &label
[loop_label
]);
918 /* Apply to material:
920 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
921 * material.ambient * ambient +
922 * material.diffuse * diffuse +
923 * oCol[1] = material.specular * specular;
925 if (key
->mtl_emissive
== 0 && key
->mtl_ambient
== 0)
926 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(rA
), vs
->mtlA
, _CONST(19));
928 ureg_ADD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), ureg_src(rA
), _CONST(25));
929 ureg_MAD(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_XYZ
), vs
->mtlA
, ureg_src(tmp
), vs
->mtlE
);
932 ureg_MAD(ureg
, ureg_writemask(oCol
[0], TGSI_WRITEMASK_XYZ
), ureg_src(rD
), vs
->mtlD
, ureg_src(tmp
));
933 ureg_MOV(ureg
, ureg_writemask(oCol
[0], TGSI_WRITEMASK_W
), vs
->mtlD
);
934 ureg_MUL(ureg
, oCol
[1], ureg_src(rS
), vs
->mtlS
);
935 ureg_release_temporary(ureg
, rAtt
);
936 ureg_release_temporary(ureg
, rHit
);
937 ureg_release_temporary(ureg
, rMid
);
938 ureg_release_temporary(ureg
, rCtr
);
939 ureg_release_temporary(ureg
, rD
);
940 ureg_release_temporary(ureg
, rA
);
941 ureg_release_temporary(ureg
, rS
);
942 ureg_release_temporary(ureg
, rAtt
);
943 ureg_release_temporary(ureg
, tmp
);
947 if (key
->mtl_emissive
== 0 && key
->mtl_ambient
== 0)
948 ureg_MOV(ureg
, ureg_writemask(oCol
[0], TGSI_WRITEMASK_XYZ
), _CONST(19));
950 ureg_MAD(ureg
, ureg_writemask(oCol
[0], TGSI_WRITEMASK_XYZ
), vs
->mtlA
, _CONST(25), vs
->mtlE
);
951 ureg_MOV(ureg
, ureg_writemask(oCol
[0], TGSI_WRITEMASK_W
), vs
->mtlD
);
952 ureg_MOV(ureg
, oCol
[1], ureg_imm1f(ureg
, 0.0f
));
954 ureg_MOV(ureg
, oCol
[0], vs
->aCol
[0]);
955 ureg_MOV(ureg
, oCol
[1], vs
->aCol
[1]);
960 * exp(x) = ex2(log2(e) * x)
963 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
964 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
965 struct ureg_dst tmp_z
= ureg_writemask(tmp
, TGSI_WRITEMASK_Z
);
966 if (key
->fog_range
) {
967 ureg_DP3(ureg
, tmp_x
, vs
->aVtx
, vs
->aVtx
);
968 ureg_RSQ(ureg
, tmp_z
, _X(tmp
));
969 ureg_MUL(ureg
, tmp_z
, _Z(tmp
), _X(tmp
));
971 ureg_MOV(ureg
, tmp_z
, ureg_abs(_ZZZZ(vs
->aVtx
)));
974 if (key
->fog_mode
== D3DFOG_EXP
) {
975 ureg_MUL(ureg
, tmp_x
, _Z(tmp
), _ZZZZ(_CONST(28)));
976 ureg_MUL(ureg
, tmp_x
, _X(tmp
), ureg_imm1f(ureg
, -1.442695f
));
977 ureg_EX2(ureg
, tmp_x
, _X(tmp
));
979 if (key
->fog_mode
== D3DFOG_EXP2
) {
980 ureg_MUL(ureg
, tmp_x
, _Z(tmp
), _ZZZZ(_CONST(28)));
981 ureg_MUL(ureg
, tmp_x
, _X(tmp
), _X(tmp
));
982 ureg_MUL(ureg
, tmp_x
, _X(tmp
), ureg_imm1f(ureg
, -1.442695f
));
983 ureg_EX2(ureg
, tmp_x
, _X(tmp
));
985 if (key
->fog_mode
== D3DFOG_LINEAR
) {
986 ureg_ADD(ureg
, tmp_x
, _XXXX(_CONST(28)), ureg_negate(_Z(tmp
)));
987 ureg_MUL(ureg
, ureg_saturate(tmp_x
), _X(tmp
), _YYYY(_CONST(28)));
989 ureg_MOV(ureg
, oFog
, _X(tmp
));
990 ureg_release_temporary(ureg
, tmp
);
991 } else if (key
->fog
&& !(key
->passthrough
& (1 << NINE_DECLUSAGE_FOG
))) {
992 ureg_MOV(ureg
, oFog
, ureg_scalar(vs
->aCol
[1], TGSI_SWIZZLE_W
));
995 if (key
->passthrough
& (1 << NINE_DECLUSAGE_BLENDWEIGHT
)) {
996 struct ureg_src input
;
997 struct ureg_dst output
;
999 output
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_GENERIC
, 18);
1000 ureg_MOV(ureg
, output
, input
);
1002 if (key
->passthrough
& (1 << NINE_DECLUSAGE_BLENDINDICES
)) {
1003 struct ureg_src input
;
1004 struct ureg_dst output
;
1006 output
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_GENERIC
, 19);
1007 ureg_MOV(ureg
, output
, input
);
1009 if (key
->passthrough
& (1 << NINE_DECLUSAGE_NORMAL
)) {
1010 struct ureg_src input
;
1011 struct ureg_dst output
;
1013 output
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_GENERIC
, 20);
1014 ureg_MOV(ureg
, output
, input
);
1016 if (key
->passthrough
& (1 << NINE_DECLUSAGE_TANGENT
)) {
1017 struct ureg_src input
;
1018 struct ureg_dst output
;
1019 input
= build_vs_add_input(vs
, NINE_DECLUSAGE_TANGENT
);
1020 output
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_GENERIC
, 21);
1021 ureg_MOV(ureg
, output
, input
);
1023 if (key
->passthrough
& (1 << NINE_DECLUSAGE_BINORMAL
)) {
1024 struct ureg_src input
;
1025 struct ureg_dst output
;
1026 input
= build_vs_add_input(vs
, NINE_DECLUSAGE_BINORMAL
);
1027 output
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_GENERIC
, 22);
1028 ureg_MOV(ureg
, output
, input
);
1030 if (key
->passthrough
& (1 << NINE_DECLUSAGE_FOG
)) {
1031 struct ureg_src input
;
1032 struct ureg_dst output
;
1033 input
= build_vs_add_input(vs
, NINE_DECLUSAGE_FOG
);
1034 input
= ureg_scalar(input
, TGSI_SWIZZLE_X
);
1036 ureg_MOV(ureg
, output
, input
);
1038 if (key
->passthrough
& (1 << NINE_DECLUSAGE_DEPTH
)) {
1039 (void) 0; /* TODO: replace z of position output ? */
1042 /* ucp for ff applies on world coordinates.
1043 * aVtx is in worldview coordinates. */
1045 struct ureg_dst clipVect
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_CLIPVERTEX
, 0);
1046 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
1047 ureg_MUL(ureg
, tmp
, _XXXX(vs
->aVtx
), _CONST(12));
1048 ureg_MAD(ureg
, tmp
, _YYYY(vs
->aVtx
), _CONST(13), ureg_src(tmp
));
1049 ureg_MAD(ureg
, tmp
, _ZZZZ(vs
->aVtx
), _CONST(14), ureg_src(tmp
));
1050 ureg_ADD(ureg
, clipVect
, _CONST(15), ureg_src(tmp
));
1051 ureg_release_temporary(ureg
, tmp
);
1054 if (key
->position_t
&& device
->driver_caps
.window_space_position_support
)
1055 ureg_property(ureg
, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION
, TRUE
);
1058 nine_ureg_tgsi_dump(ureg
, FALSE
);
1059 return ureg_create_shader_and_destroy(ureg
, device
->context
.pipe
);
1062 /* PS FF constants layout:
1064 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1065 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1066 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1067 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1068 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1069 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1070 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1072 * CONST[20] D3DRS_TEXTUREFACTOR
1073 * CONST[21] D3DRS_FOGCOLOR
1074 * CONST[22].x___ RS.FogEnd
1075 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1076 * CONST[22].__z_ RS.FogDensity
1080 struct ureg_program
*ureg
;
1082 struct ureg_src vC
[2]; /* DIFFUSE, SPECULAR */
1083 struct ureg_src vT
[8]; /* TEXCOORD[i] */
1084 struct ureg_dst rCur
; /* D3DTA_CURRENT */
1085 struct ureg_dst rMod
;
1086 struct ureg_src rCurSrc
;
1087 struct ureg_dst rTmp
; /* D3DTA_TEMP */
1088 struct ureg_src rTmpSrc
;
1089 struct ureg_dst rTex
;
1090 struct ureg_src rTexSrc
;
1091 struct ureg_src cBEM
[8];
1092 struct ureg_src s
[8];
1096 unsigned index_pre_mod
;
1100 static struct ureg_src
1101 ps_get_ts_arg(struct ps_build_ctx
*ps
, unsigned ta
)
1103 struct ureg_src reg
;
1105 switch (ta
& D3DTA_SELECTMASK
) {
1106 case D3DTA_CONSTANT
:
1107 reg
= ureg_DECL_constant(ps
->ureg
, ps
->stage
.index
);
1110 reg
= (ps
->stage
.index
== ps
->stage
.index_pre_mod
) ? ureg_src(ps
->rMod
) : ps
->rCurSrc
;
1113 reg
= ureg_DECL_fs_input(ps
->ureg
, TGSI_SEMANTIC_COLOR
, 0, TGSI_INTERPOLATE_COLOR
);
1115 case D3DTA_SPECULAR
:
1116 reg
= ureg_DECL_fs_input(ps
->ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_COLOR
);
1125 reg
= ureg_DECL_constant(ps
->ureg
, 20);
1129 reg
= ureg_src_undef();
1132 if (ta
& D3DTA_COMPLEMENT
) {
1133 struct ureg_dst dst
= ureg_DECL_temporary(ps
->ureg
);
1134 ureg_ADD(ps
->ureg
, dst
, ureg_imm1f(ps
->ureg
, 1.0f
), ureg_negate(reg
));
1135 reg
= ureg_src(dst
);
1137 if (ta
& D3DTA_ALPHAREPLICATE
)
1142 static struct ureg_dst
1143 ps_get_ts_dst(struct ps_build_ctx
*ps
, unsigned ta
)
1145 assert(!(ta
& (D3DTA_COMPLEMENT
| D3DTA_ALPHAREPLICATE
)));
1147 switch (ta
& D3DTA_SELECTMASK
) {
1154 return ureg_dst_undef();
1158 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top
)
1161 case D3DTOP_DISABLE
:
1163 case D3DTOP_SELECTARG1
:
1164 case D3DTOP_PREMODULATE
:
1166 case D3DTOP_SELECTARG2
:
1168 case D3DTOP_MULTIPLYADD
:
1176 static inline boolean
1177 is_MOV_no_op(struct ureg_dst dst
, struct ureg_src src
)
1179 return !dst
.WriteMask
||
1180 (dst
.File
== src
.File
&&
1181 dst
.Index
== src
.Index
&&
1187 (!(dst
.WriteMask
& TGSI_WRITEMASK_X
) || (src
.SwizzleX
== TGSI_SWIZZLE_X
)) &&
1188 (!(dst
.WriteMask
& TGSI_WRITEMASK_Y
) || (src
.SwizzleY
== TGSI_SWIZZLE_Y
)) &&
1189 (!(dst
.WriteMask
& TGSI_WRITEMASK_Z
) || (src
.SwizzleZ
== TGSI_SWIZZLE_Z
)) &&
1190 (!(dst
.WriteMask
& TGSI_WRITEMASK_W
) || (src
.SwizzleW
== TGSI_SWIZZLE_W
)));
1195 ps_do_ts_op(struct ps_build_ctx
*ps
, unsigned top
, struct ureg_dst dst
, struct ureg_src
*arg
)
1197 struct ureg_program
*ureg
= ps
->ureg
;
1198 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
1199 struct ureg_dst tmp2
= ureg_DECL_temporary(ureg
);
1200 struct ureg_dst tmp_x
= ureg_writemask(tmp
, TGSI_WRITEMASK_X
);
1202 tmp
.WriteMask
= dst
.WriteMask
;
1204 if (top
!= D3DTOP_SELECTARG1
&& top
!= D3DTOP_SELECTARG2
&&
1205 top
!= D3DTOP_MODULATE
&& top
!= D3DTOP_PREMODULATE
&&
1206 top
!= D3DTOP_BLENDDIFFUSEALPHA
&& top
!= D3DTOP_BLENDTEXTUREALPHA
&&
1207 top
!= D3DTOP_BLENDFACTORALPHA
&& top
!= D3DTOP_BLENDCURRENTALPHA
&&
1208 top
!= D3DTOP_BUMPENVMAP
&& top
!= D3DTOP_BUMPENVMAPLUMINANCE
&&
1210 dst
= ureg_saturate(dst
);
1213 case D3DTOP_SELECTARG1
:
1214 if (!is_MOV_no_op(dst
, arg
[1]))
1215 ureg_MOV(ureg
, dst
, arg
[1]);
1217 case D3DTOP_SELECTARG2
:
1218 if (!is_MOV_no_op(dst
, arg
[2]))
1219 ureg_MOV(ureg
, dst
, arg
[2]);
1221 case D3DTOP_MODULATE
:
1222 ureg_MUL(ureg
, dst
, arg
[1], arg
[2]);
1224 case D3DTOP_MODULATE2X
:
1225 ureg_MUL(ureg
, tmp
, arg
[1], arg
[2]);
1226 ureg_ADD(ureg
, dst
, ureg_src(tmp
), ureg_src(tmp
));
1228 case D3DTOP_MODULATE4X
:
1229 ureg_MUL(ureg
, tmp
, arg
[1], arg
[2]);
1230 ureg_MUL(ureg
, dst
, ureg_src(tmp
), ureg_imm1f(ureg
, 4.0f
));
1233 ureg_ADD(ureg
, dst
, arg
[1], arg
[2]);
1235 case D3DTOP_ADDSIGNED
:
1236 ureg_ADD(ureg
, tmp
, arg
[1], arg
[2]);
1237 ureg_ADD(ureg
, dst
, ureg_src(tmp
), ureg_imm1f(ureg
, -0.5f
));
1239 case D3DTOP_ADDSIGNED2X
:
1240 ureg_ADD(ureg
, tmp
, arg
[1], arg
[2]);
1241 ureg_MAD(ureg
, dst
, ureg_src(tmp
), ureg_imm1f(ureg
, 2.0f
), ureg_imm1f(ureg
, -1.0f
));
1243 case D3DTOP_SUBTRACT
:
1244 ureg_ADD(ureg
, dst
, arg
[1], ureg_negate(arg
[2]));
1246 case D3DTOP_ADDSMOOTH
:
1247 ureg_ADD(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), ureg_negate(arg
[1]));
1248 ureg_MAD(ureg
, dst
, ureg_src(tmp
), arg
[2], arg
[1]);
1250 case D3DTOP_BLENDDIFFUSEALPHA
:
1251 ureg_LRP(ureg
, dst
, _WWWW(ps
->vC
[0]), arg
[1], arg
[2]);
1253 case D3DTOP_BLENDTEXTUREALPHA
:
1254 /* XXX: alpha taken from previous stage, texture or result ? */
1255 ureg_LRP(ureg
, dst
, _W(ps
->rTex
), arg
[1], arg
[2]);
1257 case D3DTOP_BLENDFACTORALPHA
:
1258 ureg_LRP(ureg
, dst
, _WWWW(_CONST(20)), arg
[1], arg
[2]);
1260 case D3DTOP_BLENDTEXTUREALPHAPM
:
1261 ureg_ADD(ureg
, tmp_x
, ureg_imm1f(ureg
, 1.0f
), ureg_negate(_W(ps
->rTex
)));
1262 ureg_MAD(ureg
, dst
, arg
[2], _X(tmp
), arg
[1]);
1264 case D3DTOP_BLENDCURRENTALPHA
:
1265 ureg_LRP(ureg
, dst
, _WWWW(ps
->rCurSrc
), arg
[1], arg
[2]);
1267 case D3DTOP_PREMODULATE
:
1268 ureg_MOV(ureg
, dst
, arg
[1]);
1269 ps
->stage
.index_pre_mod
= ps
->stage
.index
+ 1;
1271 case D3DTOP_MODULATEALPHA_ADDCOLOR
:
1272 ureg_MAD(ureg
, dst
, _WWWW(arg
[1]), arg
[2], arg
[1]);
1274 case D3DTOP_MODULATECOLOR_ADDALPHA
:
1275 ureg_MAD(ureg
, dst
, arg
[1], arg
[2], _WWWW(arg
[1]));
1277 case D3DTOP_MODULATEINVALPHA_ADDCOLOR
:
1278 ureg_ADD(ureg
, tmp_x
, ureg_imm1f(ureg
, 1.0f
), ureg_negate(_WWWW(arg
[1])));
1279 ureg_MAD(ureg
, dst
, _X(tmp
), arg
[2], arg
[1]);
1281 case D3DTOP_MODULATEINVCOLOR_ADDALPHA
:
1282 ureg_ADD(ureg
, tmp
, ureg_imm1f(ureg
, 1.0f
), ureg_negate(arg
[1]));
1283 ureg_MAD(ureg
, dst
, ureg_src(tmp
), arg
[2], _WWWW(arg
[1]));
1285 case D3DTOP_BUMPENVMAP
:
1287 case D3DTOP_BUMPENVMAPLUMINANCE
:
1289 case D3DTOP_DOTPRODUCT3
:
1290 ureg_ADD(ureg
, tmp
, arg
[1], ureg_imm4f(ureg
,-0.5,-0.5,-0.5,-0.5));
1291 ureg_ADD(ureg
, tmp2
, arg
[2] , ureg_imm4f(ureg
,-0.5,-0.5,-0.5,-0.5));
1292 ureg_DP3(ureg
, tmp
, ureg_src(tmp
), ureg_src(tmp2
));
1293 ureg_MUL(ureg
, ureg_saturate(dst
), ureg_src(tmp
), ureg_imm4f(ureg
,4.0,4.0,4.0,4.0));
1295 case D3DTOP_MULTIPLYADD
:
1296 ureg_MAD(ureg
, dst
, arg
[1], arg
[2], arg
[0]);
1299 ureg_LRP(ureg
, dst
, arg
[0], arg
[1], arg
[2]);
1301 case D3DTOP_DISABLE
:
1305 assert(!"invalid D3DTOP");
1308 ureg_release_temporary(ureg
, tmp
);
1309 ureg_release_temporary(ureg
, tmp2
);
1313 nine_ff_build_ps(struct NineDevice9
*device
, struct nine_ff_ps_key
*key
)
1315 struct ps_build_ctx ps
;
1316 struct ureg_program
*ureg
= ureg_create(PIPE_SHADER_FRAGMENT
);
1317 struct ureg_dst oCol
;
1319 const unsigned texcoord_sn
= get_texcoord_sn(device
->screen
);
1321 memset(&ps
, 0, sizeof(ps
));
1323 ps
.stage
.index_pre_mod
= -1;
1325 ps
.vC
[0] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 0, TGSI_INTERPOLATE_COLOR
);
1327 ps
.rCur
= ureg_DECL_temporary(ureg
);
1328 ps
.rTmp
= ureg_DECL_temporary(ureg
);
1329 ps
.rTex
= ureg_DECL_temporary(ureg
);
1330 ps
.rCurSrc
= ureg_src(ps
.rCur
);
1331 ps
.rTmpSrc
= ureg_src(ps
.rTmp
);
1332 ps
.rTexSrc
= ureg_src(ps
.rTex
);
1334 /* Initial values */
1335 ureg_MOV(ureg
, ps
.rCur
, ps
.vC
[0]);
1336 ureg_MOV(ureg
, ps
.rTmp
, ureg_imm1f(ureg
, 0.0f
));
1337 ureg_MOV(ureg
, ps
.rTex
, ureg_imm1f(ureg
, 0.0f
));
1339 for (s
= 0; s
< 8; ++s
) {
1340 ps
.s
[s
] = ureg_src_undef();
1342 if (key
->ts
[s
].colorop
!= D3DTOP_DISABLE
) {
1343 if (key
->ts
[s
].colorarg0
== D3DTA_SPECULAR
||
1344 key
->ts
[s
].colorarg1
== D3DTA_SPECULAR
||
1345 key
->ts
[s
].colorarg2
== D3DTA_SPECULAR
)
1346 ps
.vC
[1] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_COLOR
);
1348 if (key
->ts
[s
].colorarg0
== D3DTA_TEXTURE
||
1349 key
->ts
[s
].colorarg1
== D3DTA_TEXTURE
||
1350 key
->ts
[s
].colorarg2
== D3DTA_TEXTURE
) {
1351 ps
.s
[s
] = ureg_DECL_sampler(ureg
, s
);
1352 ps
.vT
[s
] = ureg_DECL_fs_input(ureg
, texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
1354 if (s
&& (key
->ts
[s
- 1].colorop
== D3DTOP_PREMODULATE
||
1355 key
->ts
[s
- 1].alphaop
== D3DTOP_PREMODULATE
))
1356 ps
.s
[s
] = ureg_DECL_sampler(ureg
, s
);
1359 if (key
->ts
[s
].alphaop
!= D3DTOP_DISABLE
) {
1360 if (key
->ts
[s
].alphaarg0
== D3DTA_SPECULAR
||
1361 key
->ts
[s
].alphaarg1
== D3DTA_SPECULAR
||
1362 key
->ts
[s
].alphaarg2
== D3DTA_SPECULAR
)
1363 ps
.vC
[1] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_COLOR
);
1365 if (key
->ts
[s
].alphaarg0
== D3DTA_TEXTURE
||
1366 key
->ts
[s
].alphaarg1
== D3DTA_TEXTURE
||
1367 key
->ts
[s
].alphaarg2
== D3DTA_TEXTURE
) {
1368 ps
.s
[s
] = ureg_DECL_sampler(ureg
, s
);
1369 ps
.vT
[s
] = ureg_DECL_fs_input(ureg
, texcoord_sn
, s
, TGSI_INTERPOLATE_PERSPECTIVE
);
1374 ps
.vC
[1] = ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_COLOR
, 1, TGSI_INTERPOLATE_COLOR
);
1376 oCol
= ureg_DECL_output(ureg
, TGSI_SEMANTIC_COLOR
, 0);
1380 for (s
= 0; s
< 8; ++s
) {
1381 unsigned colorarg
[3];
1382 unsigned alphaarg
[3];
1383 const uint8_t used_c
= ps_d3dtop_args_mask(key
->ts
[s
].colorop
);
1384 const uint8_t used_a
= ps_d3dtop_args_mask(key
->ts
[s
].alphaop
);
1385 struct ureg_dst dst
;
1386 struct ureg_src arg
[3];
1388 if (key
->ts
[s
].colorop
== D3DTOP_DISABLE
) {
1389 assert (key
->ts
[s
].alphaop
== D3DTOP_DISABLE
);
1394 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s
,
1395 nine_D3DTOP_to_str(key
->ts
[s
].colorop
),
1396 nine_D3DTOP_to_str(key
->ts
[s
].alphaop
));
1398 if (!ureg_src_is_undef(ps
.s
[s
])) {
1400 struct ureg_src texture_coord
= ps
.vT
[s
];
1401 struct ureg_dst delta
;
1402 switch (key
->ts
[s
].textarget
) {
1403 case 0: target
= TGSI_TEXTURE_1D
; break;
1404 case 1: target
= TGSI_TEXTURE_2D
; break;
1405 case 2: target
= TGSI_TEXTURE_3D
; break;
1406 case 3: target
= TGSI_TEXTURE_CUBE
; break;
1407 /* this is a 2 bit bitfield, do I really need a default case ? */
1410 /* Modify coordinates */
1412 (key
->ts
[s
-1].colorop
== D3DTOP_BUMPENVMAP
||
1413 key
->ts
[s
-1].colorop
== D3DTOP_BUMPENVMAPLUMINANCE
)) {
1414 delta
= ureg_DECL_temporary(ureg
);
1415 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1416 ureg_MUL(ureg
, ureg_writemask(delta
, TGSI_WRITEMASK_X
), _X(ps
.rTex
), _XXXX(_CONST(8 + s
- 1)));
1417 ureg_MAD(ureg
, ureg_writemask(delta
, TGSI_WRITEMASK_X
), _Y(ps
.rTex
), _ZZZZ(_CONST(8 + s
- 1)), ureg_src(delta
));
1418 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1419 ureg_MUL(ureg
, ureg_writemask(delta
, TGSI_WRITEMASK_Y
), _X(ps
.rTex
), _YYYY(_CONST(8 + s
- 1)));
1420 ureg_MAD(ureg
, ureg_writemask(delta
, TGSI_WRITEMASK_Y
), _Y(ps
.rTex
), _WWWW(_CONST(8 + s
- 1)), ureg_src(delta
));
1421 texture_coord
= ureg_src(ureg_DECL_temporary(ureg
));
1422 ureg_MOV(ureg
, ureg_writemask(ureg_dst(texture_coord
), ureg_dst(ps
.vT
[s
]).WriteMask
), ps
.vT
[s
]);
1423 ureg_ADD(ureg
, ureg_writemask(ureg_dst(texture_coord
), TGSI_WRITEMASK_XY
), texture_coord
, ureg_src(delta
));
1424 /* Prepare luminance multiplier
1425 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1426 if (key
->ts
[s
-1].colorop
== D3DTOP_BUMPENVMAPLUMINANCE
) {
1427 struct ureg_src bumpenvlscale
= ((s
-1) & 1) ? _ZZZZ(_CONST(16 + (s
-1) / 2)) : _XXXX(_CONST(16 + (s
-1) / 2));
1428 struct ureg_src bumpenvloffset
= ((s
-1) & 1) ? _WWWW(_CONST(16 + (s
-1) / 2)) : _YYYY(_CONST(16 + (s
-1) / 2));
1430 ureg_MAD(ureg
, ureg_saturate(ureg_writemask(delta
, TGSI_WRITEMASK_X
)), _Z(ps
.rTex
), bumpenvlscale
, bumpenvloffset
);
1433 if (key
->projected
& (3 << (s
*2))) {
1434 unsigned dim
= 1 + ((key
->projected
>> (2 * s
)) & 3);
1436 ureg_TXP(ureg
, ps
.rTex
, target
, texture_coord
, ps
.s
[s
]);
1438 struct ureg_dst tmp
= ureg_DECL_temporary(ureg
);
1439 ureg_RCP(ureg
, ureg_writemask(tmp
, TGSI_WRITEMASK_X
), ureg_scalar(texture_coord
, dim
-1));
1440 ureg_MUL(ureg
, ps
.rTmp
, _X(tmp
), texture_coord
);
1441 ureg_TEX(ureg
, ps
.rTex
, target
, ps
.rTmpSrc
, ps
.s
[s
]);
1442 ureg_release_temporary(ureg
, tmp
);
1445 ureg_TEX(ureg
, ps
.rTex
, target
, texture_coord
, ps
.s
[s
]);
1447 if (s
>= 1 && key
->ts
[s
-1].colorop
== D3DTOP_BUMPENVMAPLUMINANCE
)
1448 ureg_MUL(ureg
, ps
.rTex
, ureg_src(ps
.rTex
), _X(delta
));
1451 if (key
->ts
[s
].colorop
== D3DTOP_BUMPENVMAP
||
1452 key
->ts
[s
].colorop
== D3DTOP_BUMPENVMAPLUMINANCE
)
1455 dst
= ps_get_ts_dst(&ps
, key
->ts
[s
].resultarg
? D3DTA_TEMP
: D3DTA_CURRENT
);
1457 if (ps
.stage
.index_pre_mod
== ps
.stage
.index
) {
1458 ps
.rMod
= ureg_DECL_temporary(ureg
);
1459 ureg_MUL(ureg
, ps
.rMod
, ps
.rCurSrc
, ps
.rTexSrc
);
1462 colorarg
[0] = (key
->ts
[s
].colorarg0
| ((key
->colorarg_b4
[0] >> s
) << 4) | ((key
->colorarg_b5
[0] >> s
) << 5)) & 0x3f;
1463 colorarg
[1] = (key
->ts
[s
].colorarg1
| ((key
->colorarg_b4
[1] >> s
) << 4) | ((key
->colorarg_b5
[1] >> s
) << 5)) & 0x3f;
1464 colorarg
[2] = (key
->ts
[s
].colorarg2
| ((key
->colorarg_b4
[2] >> s
) << 4) | ((key
->colorarg_b5
[2] >> s
) << 5)) & 0x3f;
1465 alphaarg
[0] = (key
->ts
[s
].alphaarg0
| ((key
->alphaarg_b4
[0] >> s
) << 4)) & 0x1f;
1466 alphaarg
[1] = (key
->ts
[s
].alphaarg1
| ((key
->alphaarg_b4
[1] >> s
) << 4)) & 0x1f;
1467 alphaarg
[2] = (key
->ts
[s
].alphaarg2
| ((key
->alphaarg_b4
[2] >> s
) << 4)) & 0x1f;
1469 if (key
->ts
[s
].colorop
!= key
->ts
[s
].alphaop
||
1470 colorarg
[0] != alphaarg
[0] ||
1471 colorarg
[1] != alphaarg
[1] ||
1472 colorarg
[2] != alphaarg
[2])
1473 dst
.WriteMask
= TGSI_WRITEMASK_XYZ
;
1475 /* Special DOTPRODUCT behaviour (see wine tests) */
1476 if (key
->ts
[s
].colorop
== D3DTOP_DOTPRODUCT3
)
1477 dst
.WriteMask
= TGSI_WRITEMASK_XYZW
;
1479 if (used_c
& 0x1) arg
[0] = ps_get_ts_arg(&ps
, colorarg
[0]);
1480 if (used_c
& 0x2) arg
[1] = ps_get_ts_arg(&ps
, colorarg
[1]);
1481 if (used_c
& 0x4) arg
[2] = ps_get_ts_arg(&ps
, colorarg
[2]);
1482 ps_do_ts_op(&ps
, key
->ts
[s
].colorop
, dst
, arg
);
1484 if (dst
.WriteMask
!= TGSI_WRITEMASK_XYZW
) {
1485 dst
.WriteMask
= TGSI_WRITEMASK_W
;
1487 if (used_a
& 0x1) arg
[0] = ps_get_ts_arg(&ps
, alphaarg
[0]);
1488 if (used_a
& 0x2) arg
[1] = ps_get_ts_arg(&ps
, alphaarg
[1]);
1489 if (used_a
& 0x4) arg
[2] = ps_get_ts_arg(&ps
, alphaarg
[2]);
1490 ps_do_ts_op(&ps
, key
->ts
[s
].alphaop
, dst
, arg
);
1495 ureg_ADD(ureg
, ureg_writemask(ps
.rCur
, TGSI_WRITEMASK_XYZ
), ps
.rCurSrc
, ps
.vC
[1]);
1499 if (key
->fog_mode
) {
1500 struct ureg_dst rFog
= ureg_writemask(ps
.rTmp
, TGSI_WRITEMASK_X
);
1501 struct ureg_src vPos
;
1502 if (device
->screen
->get_param(device
->screen
,
1503 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL
)) {
1504 vPos
= ureg_DECL_system_value(ureg
, TGSI_SEMANTIC_POSITION
, 0);
1506 vPos
= ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_POSITION
, 0,
1507 TGSI_INTERPOLATE_LINEAR
);
1510 /* Source is either W or Z.
1511 * When we use vs ff,
1512 * Z is when an orthogonal projection matrix is detected,
1514 * Z is used for programmable vs.
1515 * Note: Tests indicate that the projection matrix coefficients do
1516 * actually affect pixel fog (and not vertex fog) when vs ff is used,
1517 * which justifies taking the position's w instead of taking the z coordinate
1518 * before the projection in the vs shader.
1520 if (!key
->fog_source
)
1521 ureg_MOV(ureg
, rFog
, _ZZZZ(vPos
));
1523 /* Position's w is 1/w */
1524 ureg_RCP(ureg
, rFog
, _WWWW(vPos
));
1526 if (key
->fog_mode
== D3DFOG_EXP
) {
1527 ureg_MUL(ureg
, rFog
, _X(rFog
), _ZZZZ(_CONST(22)));
1528 ureg_MUL(ureg
, rFog
, _X(rFog
), ureg_imm1f(ureg
, -1.442695f
));
1529 ureg_EX2(ureg
, rFog
, _X(rFog
));
1531 if (key
->fog_mode
== D3DFOG_EXP2
) {
1532 ureg_MUL(ureg
, rFog
, _X(rFog
), _ZZZZ(_CONST(22)));
1533 ureg_MUL(ureg
, rFog
, _X(rFog
), _X(rFog
));
1534 ureg_MUL(ureg
, rFog
, _X(rFog
), ureg_imm1f(ureg
, -1.442695f
));
1535 ureg_EX2(ureg
, rFog
, _X(rFog
));
1537 if (key
->fog_mode
== D3DFOG_LINEAR
) {
1538 ureg_ADD(ureg
, rFog
, _XXXX(_CONST(22)), ureg_negate(_X(rFog
)));
1539 ureg_MUL(ureg
, ureg_saturate(rFog
), _X(rFog
), _YYYY(_CONST(22)));
1541 ureg_LRP(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_XYZ
), _X(rFog
), ps
.rCurSrc
, _CONST(21));
1542 ureg_MOV(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_W
), ps
.rCurSrc
);
1545 struct ureg_src vFog
= ureg_DECL_fs_input(ureg
, TGSI_SEMANTIC_FOG
, 0, TGSI_INTERPOLATE_PERSPECTIVE
);
1546 ureg_LRP(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_XYZ
), _XXXX(vFog
), ps
.rCurSrc
, _CONST(21));
1547 ureg_MOV(ureg
, ureg_writemask(oCol
, TGSI_WRITEMASK_W
), ps
.rCurSrc
);
1549 ureg_MOV(ureg
, oCol
, ps
.rCurSrc
);
1553 nine_ureg_tgsi_dump(ureg
, FALSE
);
1554 return ureg_create_shader_and_destroy(ureg
, device
->context
.pipe
);
1557 static struct NineVertexShader9
*
1558 nine_ff_get_vs(struct NineDevice9
*device
)
1560 const struct nine_context
*context
= &device
->context
;
1561 struct NineVertexShader9
*vs
;
1562 enum pipe_error err
;
1563 struct vs_build_ctx bld
;
1564 struct nine_ff_vs_key key
;
1566 boolean has_indexes
= false;
1567 boolean has_weights
= false;
1568 char input_texture_coord
[8];
1570 assert(sizeof(key
) <= sizeof(key
.value32
));
1572 memset(&key
, 0, sizeof(key
));
1573 memset(&bld
, 0, sizeof(bld
));
1574 memset(&input_texture_coord
, 0, sizeof(input_texture_coord
));
1578 /* FIXME: this shouldn't be NULL, but it is on init */
1579 if (context
->vdecl
) {
1580 key
.color0in_one
= 1;
1581 key
.color1in_zero
= 1;
1582 for (i
= 0; i
< context
->vdecl
->nelems
; i
++) {
1583 uint16_t usage
= context
->vdecl
->usage_map
[i
];
1584 if (usage
== NINE_DECLUSAGE_POSITIONT
)
1586 else if (usage
== NINE_DECLUSAGE_i(COLOR
, 0))
1587 key
.color0in_one
= 0;
1588 else if (usage
== NINE_DECLUSAGE_i(COLOR
, 1))
1589 key
.color1in_zero
= 0;
1590 else if (usage
== NINE_DECLUSAGE_i(BLENDINDICES
, 0)) {
1592 key
.passthrough
|= 1 << usage
;
1593 } else if (usage
== NINE_DECLUSAGE_i(BLENDWEIGHT
, 0)) {
1595 key
.passthrough
|= 1 << usage
;
1596 } else if (usage
== NINE_DECLUSAGE_i(NORMAL
, 0)) {
1598 key
.passthrough
|= 1 << usage
;
1599 } else if (usage
== NINE_DECLUSAGE_PSIZE
)
1600 key
.vertexpointsize
= 1;
1601 else if (usage
% NINE_DECLUSAGE_COUNT
== NINE_DECLUSAGE_TEXCOORD
) {
1602 s
= usage
/ NINE_DECLUSAGE_COUNT
;
1604 input_texture_coord
[s
] = nine_decltype_get_dim(context
->vdecl
->decls
[i
].Type
);
1606 DBG("FF given texture coordinate >= 8. Ignoring\n");
1607 } else if (usage
< NINE_DECLUSAGE_NONE
)
1608 key
.passthrough
|= 1 << usage
;
1611 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1612 * We do restrict to indices 0 */
1613 key
.passthrough
&= ~((1 << NINE_DECLUSAGE_POSITION
) | (1 << NINE_DECLUSAGE_PSIZE
) |
1614 (1 << NINE_DECLUSAGE_TEXCOORD
) | (1 << NINE_DECLUSAGE_POSITIONT
) |
1615 (1 << NINE_DECLUSAGE_TESSFACTOR
) | (1 << NINE_DECLUSAGE_SAMPLE
));
1616 if (!key
.position_t
)
1617 key
.passthrough
= 0;
1618 key
.pointscale
= !!context
->rs
[D3DRS_POINTSCALEENABLE
];
1620 key
.lighting
= !!context
->rs
[D3DRS_LIGHTING
] && context
->ff
.num_lights_active
;
1621 key
.darkness
= !!context
->rs
[D3DRS_LIGHTING
] && !context
->ff
.num_lights_active
;
1622 if (key
.position_t
) {
1623 key
.darkness
= 0; /* |= key.lighting; */ /* XXX ? */
1626 if ((key
.lighting
| key
.darkness
) && context
->rs
[D3DRS_COLORVERTEX
]) {
1627 uint32_t mask
= (key
.color0in_one
? 0 : 1) | (key
.color1in_zero
? 0 : 2);
1628 key
.mtl_diffuse
= context
->rs
[D3DRS_DIFFUSEMATERIALSOURCE
] & mask
;
1629 key
.mtl_ambient
= context
->rs
[D3DRS_AMBIENTMATERIALSOURCE
] & mask
;
1630 key
.mtl_specular
= context
->rs
[D3DRS_SPECULARMATERIALSOURCE
] & mask
;
1631 key
.mtl_emissive
= context
->rs
[D3DRS_EMISSIVEMATERIALSOURCE
] & mask
;
1633 key
.fog
= !!context
->rs
[D3DRS_FOGENABLE
];
1634 key
.fog_mode
= (!key
.position_t
&& context
->rs
[D3DRS_FOGENABLE
]) ? context
->rs
[D3DRS_FOGVERTEXMODE
] : 0;
1636 key
.fog_range
= context
->rs
[D3DRS_RANGEFOGENABLE
];
1638 key
.localviewer
= !!context
->rs
[D3DRS_LOCALVIEWER
];
1639 key
.normalizenormals
= !!context
->rs
[D3DRS_NORMALIZENORMALS
];
1640 key
.ucp
= !!context
->rs
[D3DRS_CLIPPLANEENABLE
];
1642 if (context
->rs
[D3DRS_VERTEXBLEND
] != D3DVBF_DISABLE
) {
1643 key
.vertexblend_indexed
= !!context
->rs
[D3DRS_INDEXEDVERTEXBLENDENABLE
] && has_indexes
;
1645 switch (context
->rs
[D3DRS_VERTEXBLEND
]) {
1646 case D3DVBF_0WEIGHTS
: key
.vertexblend
= key
.vertexblend_indexed
; break;
1647 case D3DVBF_1WEIGHTS
: key
.vertexblend
= 2; break;
1648 case D3DVBF_2WEIGHTS
: key
.vertexblend
= 3; break;
1649 case D3DVBF_3WEIGHTS
: key
.vertexblend
= 4; break;
1650 case D3DVBF_TWEENING
: key
.vertextween
= 1; break;
1652 assert(!"invalid D3DVBF");
1655 if (!has_weights
&& context
->rs
[D3DRS_VERTEXBLEND
] != D3DVBF_0WEIGHTS
)
1656 key
.vertexblend
= 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1659 for (s
= 0; s
< 8; ++s
) {
1660 unsigned gen
= (context
->ff
.tex_stage
[s
][D3DTSS_TEXCOORDINDEX
] >> 16) + 1;
1661 unsigned idx
= context
->ff
.tex_stage
[s
][D3DTSS_TEXCOORDINDEX
] & 7;
1664 if (key
.position_t
&& gen
> NINED3DTSS_TCI_PASSTHRU
)
1665 gen
= NINED3DTSS_TCI_PASSTHRU
;
1667 if (!input_texture_coord
[idx
] && gen
== NINED3DTSS_TCI_PASSTHRU
)
1668 gen
= NINED3DTSS_TCI_DISABLE
;
1670 key
.tc_gen
|= gen
<< (s
* 3);
1671 key
.tc_idx
|= idx
<< (s
* 3);
1672 key
.tc_dim_input
|= ((input_texture_coord
[idx
]-1) & 0x3) << (s
* 2);
1674 dim
= context
->ff
.tex_stage
[s
][D3DTSS_TEXTURETRANSFORMFLAGS
] & 0x7;
1676 dim
= input_texture_coord
[idx
];
1677 if (dim
== 1) /* NV behaviour */
1679 key
.tc_dim_output
|= dim
<< (s
* 3);
1682 vs
= util_hash_table_get(device
->ff
.ht_vs
, &key
);
1685 NineVertexShader9_new(device
, &vs
, NULL
, nine_ff_build_vs(device
, &bld
));
1687 nine_ff_prune_vs(device
);
1691 memcpy(&vs
->ff_key
, &key
, sizeof(vs
->ff_key
));
1693 err
= util_hash_table_set(device
->ff
.ht_vs
, &vs
->ff_key
, vs
);
1695 assert(err
== PIPE_OK
);
1696 device
->ff
.num_vs
++;
1697 NineUnknown_ConvertRefToBind(NineUnknown(vs
));
1699 vs
->num_inputs
= bld
.num_inputs
;
1700 for (n
= 0; n
< bld
.num_inputs
; ++n
)
1701 vs
->input_map
[n
].ndecl
= bld
.input
[n
];
1703 vs
->position_t
= key
.position_t
;
1704 vs
->point_size
= key
.vertexpointsize
| key
.pointscale
;
1709 #define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
1710 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1712 static struct NinePixelShader9
*
1713 nine_ff_get_ps(struct NineDevice9
*device
)
1715 struct nine_context
*context
= &device
->context
;
1716 D3DMATRIX
*projection_matrix
= GET_D3DTS(PROJECTION
);
1717 struct NinePixelShader9
*ps
;
1718 enum pipe_error err
;
1719 struct nine_ff_ps_key key
;
1721 uint8_t sampler_mask
= 0;
1723 assert(sizeof(key
) <= sizeof(key
.value32
));
1725 memset(&key
, 0, sizeof(key
));
1726 for (s
= 0; s
< 8; ++s
) {
1727 key
.ts
[s
].colorop
= context
->ff
.tex_stage
[s
][D3DTSS_COLOROP
];
1728 key
.ts
[s
].alphaop
= context
->ff
.tex_stage
[s
][D3DTSS_ALPHAOP
];
1729 const uint8_t used_c
= ps_d3dtop_args_mask(key
.ts
[s
].colorop
);
1730 const uint8_t used_a
= ps_d3dtop_args_mask(key
.ts
[s
].alphaop
);
1731 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
1732 * ALPHAOP cannot be enabled if COLOROP is disabled.
1733 * Verified on Windows. */
1734 if (key
.ts
[s
].colorop
== D3DTOP_DISABLE
) {
1735 key
.ts
[s
].alphaop
= D3DTOP_DISABLE
; /* DISABLE == 1, avoid degenerate keys */
1739 if (!context
->texture
[s
].enabled
&&
1740 ((context
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
] == D3DTA_TEXTURE
&&
1742 (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] == D3DTA_TEXTURE
&&
1744 (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
] == D3DTA_TEXTURE
&&
1746 /* Tested on Windows: Invalid texture read disables the stage
1747 * and the subsequent ones, but only for colorop. For alpha,
1748 * it's as if the texture had alpha of 1.0, which is what
1749 * has our dummy texture in that case. Invalid color also
1750 * disabled the following alpha stages. */
1751 key
.ts
[s
].colorop
= key
.ts
[s
].alphaop
= D3DTOP_DISABLE
;
1755 if (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
] == D3DTA_TEXTURE
||
1756 context
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] == D3DTA_TEXTURE
||
1757 context
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
] == D3DTA_TEXTURE
||
1758 context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG0
] == D3DTA_TEXTURE
||
1759 context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG1
] == D3DTA_TEXTURE
||
1760 context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG2
] == D3DTA_TEXTURE
)
1761 sampler_mask
|= (1 << s
);
1763 if (key
.ts
[s
].colorop
!= D3DTOP_DISABLE
) {
1764 if (used_c
& 0x1) key
.ts
[s
].colorarg0
= context
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
];
1765 if (used_c
& 0x2) key
.ts
[s
].colorarg1
= context
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
];
1766 if (used_c
& 0x4) key
.ts
[s
].colorarg2
= context
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
];
1767 if (used_c
& 0x1) key
.colorarg_b4
[0] |= (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
] >> 4) << s
;
1768 if (used_c
& 0x1) key
.colorarg_b5
[0] |= (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG0
] >> 5) << s
;
1769 if (used_c
& 0x2) key
.colorarg_b4
[1] |= (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] >> 4) << s
;
1770 if (used_c
& 0x2) key
.colorarg_b5
[1] |= (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG1
] >> 5) << s
;
1771 if (used_c
& 0x4) key
.colorarg_b4
[2] |= (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
] >> 4) << s
;
1772 if (used_c
& 0x4) key
.colorarg_b5
[2] |= (context
->ff
.tex_stage
[s
][D3DTSS_COLORARG2
] >> 5) << s
;
1774 if (key
.ts
[s
].alphaop
!= D3DTOP_DISABLE
) {
1775 if (used_a
& 0x1) key
.ts
[s
].alphaarg0
= context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG0
];
1776 if (used_a
& 0x2) key
.ts
[s
].alphaarg1
= context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG1
];
1777 if (used_a
& 0x4) key
.ts
[s
].alphaarg2
= context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG2
];
1778 if (used_a
& 0x1) key
.alphaarg_b4
[0] |= (context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG0
] >> 4) << s
;
1779 if (used_a
& 0x2) key
.alphaarg_b4
[1] |= (context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG1
] >> 4) << s
;
1780 if (used_a
& 0x4) key
.alphaarg_b4
[2] |= (context
->ff
.tex_stage
[s
][D3DTSS_ALPHAARG2
] >> 4) << s
;
1782 key
.ts
[s
].resultarg
= context
->ff
.tex_stage
[s
][D3DTSS_RESULTARG
] == D3DTA_TEMP
;
1784 if (context
->texture
[s
].enabled
) {
1785 switch (context
->texture
[s
].type
) {
1786 case D3DRTYPE_TEXTURE
: key
.ts
[s
].textarget
= 1; break;
1787 case D3DRTYPE_VOLUMETEXTURE
: key
.ts
[s
].textarget
= 2; break;
1788 case D3DRTYPE_CUBETEXTURE
: key
.ts
[s
].textarget
= 3; break;
1790 assert(!"unexpected texture type");
1794 key
.ts
[s
].textarget
= 1;
1798 /* Note: If colorop is D3DTOP_DISABLE for the first stage
1799 * (which implies alphaop is too), nothing particular happens,
1800 * that is, current is equal to diffuse (which is the case anyway,
1801 * because it is how it is initialized).
1802 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop,
1803 * because then if the resultarg is TEMP, then diffuse alpha is written
1805 if (key
.ts
[0].colorop
!= D3DTOP_DISABLE
&&
1806 key
.ts
[0].alphaop
== D3DTOP_DISABLE
&&
1807 key
.ts
[0].resultarg
!= 0) {
1808 key
.ts
[0].alphaop
= D3DTOP_SELECTARG1
;
1809 key
.ts
[0].alphaarg1
= D3DTA_DIFFUSE
;
1811 /* When no alpha stage writes to current, diffuse alpha is taken.
1812 * Since we initialize current to diffuse, we have the behaviour. */
1814 /* Last stage always writes to Current */
1816 key
.ts
[s
-1].resultarg
= 0;
1818 key
.projected
= nine_ff_get_projected_key(context
);
1819 key
.specular
= !!context
->rs
[D3DRS_SPECULARENABLE
];
1822 key
.ts
[s
].colorop
= key
.ts
[s
].alphaop
= D3DTOP_DISABLE
;
1823 if (context
->rs
[D3DRS_FOGENABLE
])
1824 key
.fog_mode
= context
->rs
[D3DRS_FOGTABLEMODE
];
1825 key
.fog
= !!context
->rs
[D3DRS_FOGENABLE
];
1826 /* Pixel fog (with WFOG advertised): source is either Z or W.
1827 * W is the source if vs ff is used, and the
1828 * projection matrix is not orthogonal.
1829 * Tests on Win 10 seem to indicate _34
1830 * and _33 are checked against 0, 1. */
1831 if (key
.fog_mode
&& key
.fog
)
1832 key
.fog_source
= !context
->programmable_vs
&&
1833 !(projection_matrix
->_34
== 0.0f
&&
1834 projection_matrix
->_44
== 1.0f
);
1836 ps
= util_hash_table_get(device
->ff
.ht_ps
, &key
);
1839 NinePixelShader9_new(device
, &ps
, NULL
, nine_ff_build_ps(device
, &key
));
1841 nine_ff_prune_ps(device
);
1843 memcpy(&ps
->ff_key
, &key
, sizeof(ps
->ff_key
));
1845 err
= util_hash_table_set(device
->ff
.ht_ps
, &ps
->ff_key
, ps
);
1847 assert(err
== PIPE_OK
);
1848 device
->ff
.num_ps
++;
1849 NineUnknown_ConvertRefToBind(NineUnknown(ps
));
1852 ps
->sampler_mask
= sampler_mask
;
1858 nine_ff_load_vs_transforms(struct NineDevice9
*device
)
1860 struct nine_context
*context
= &device
->context
;
1862 D3DMATRIX
*M
= (D3DMATRIX
*)device
->ff
.vs_const
;
1865 /* TODO: make this nicer, and only upload the ones we need */
1866 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1868 if (IS_D3DTS_DIRTY(context
, WORLD
) ||
1869 IS_D3DTS_DIRTY(context
, VIEW
) ||
1870 IS_D3DTS_DIRTY(context
, PROJECTION
)) {
1871 /* WVP, WV matrices */
1872 nine_d3d_matrix_matrix_mul(&M
[1], GET_D3DTS(WORLD
), GET_D3DTS(VIEW
));
1873 nine_d3d_matrix_matrix_mul(&M
[0], &M
[1], GET_D3DTS(PROJECTION
));
1875 /* normal matrix == transpose(inverse(WV)) */
1876 nine_d3d_matrix_inverse(&T
, &M
[1]);
1877 nine_d3d_matrix_transpose(&M
[4], &T
);
1880 M
[2] = *GET_D3DTS(PROJECTION
);
1882 /* V and W matrix */
1883 nine_d3d_matrix_inverse(&M
[3], GET_D3DTS(VIEW
));
1887 if (context
->rs
[D3DRS_VERTEXBLEND
] != D3DVBF_DISABLE
) {
1888 /* load other world matrices */
1889 for (i
= 1; i
<= 8; ++i
) {
1890 nine_d3d_matrix_matrix_mul(&M
[40 + i
], GET_D3DTS(WORLDMATRIX(i
)), GET_D3DTS(VIEW
));
1894 device
->ff
.vs_const
[30 * 4] = asfloat(context
->rs
[D3DRS_TWEENFACTOR
]);
1898 nine_ff_load_lights(struct NineDevice9
*device
)
1900 struct nine_context
*context
= &device
->context
;
1901 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.vs_const
;
1904 if (context
->changed
.group
& NINE_STATE_FF_MATERIAL
) {
1905 const D3DMATERIAL9
*mtl
= &context
->ff
.material
;
1907 memcpy(&dst
[20], &mtl
->Diffuse
, 4 * sizeof(float));
1908 memcpy(&dst
[21], &mtl
->Ambient
, 4 * sizeof(float));
1909 memcpy(&dst
[22], &mtl
->Specular
, 4 * sizeof(float));
1910 dst
[23].x
= mtl
->Power
;
1911 memcpy(&dst
[24], &mtl
->Emissive
, 4 * sizeof(float));
1912 d3dcolor_to_rgba(&dst
[25].x
, context
->rs
[D3DRS_AMBIENT
]);
1913 dst
[19].x
= dst
[25].x
* mtl
->Ambient
.r
+ mtl
->Emissive
.r
;
1914 dst
[19].y
= dst
[25].y
* mtl
->Ambient
.g
+ mtl
->Emissive
.g
;
1915 dst
[19].z
= dst
[25].z
* mtl
->Ambient
.b
+ mtl
->Emissive
.b
;
1918 if (!(context
->changed
.group
& NINE_STATE_FF_LIGHTING
))
1921 for (l
= 0; l
< context
->ff
.num_lights_active
; ++l
) {
1922 const D3DLIGHT9
*light
= &context
->ff
.light
[context
->ff
.active_light
[l
]];
1924 dst
[32 + l
* 8].x
= light
->Type
;
1925 dst
[32 + l
* 8].y
= light
->Attenuation0
;
1926 dst
[32 + l
* 8].z
= light
->Attenuation1
;
1927 dst
[32 + l
* 8].w
= light
->Attenuation2
;
1928 memcpy(&dst
[33 + l
* 8].x
, &light
->Diffuse
, sizeof(light
->Diffuse
));
1929 memcpy(&dst
[34 + l
* 8].x
, &light
->Specular
, sizeof(light
->Specular
));
1930 memcpy(&dst
[35 + l
* 8].x
, &light
->Ambient
, sizeof(light
->Ambient
));
1931 nine_d3d_vector4_matrix_mul((D3DVECTOR
*)&dst
[36 + l
* 8].x
, &light
->Position
, GET_D3DTS(VIEW
));
1932 nine_d3d_vector3_matrix_mul((D3DVECTOR
*)&dst
[37 + l
* 8].x
, &light
->Direction
, GET_D3DTS(VIEW
));
1933 dst
[36 + l
* 8].w
= light
->Type
== D3DLIGHT_DIRECTIONAL
? 1e9f
: light
->Range
;
1934 dst
[37 + l
* 8].w
= light
->Falloff
;
1935 dst
[38 + l
* 8].x
= cosf(light
->Theta
* 0.5f
);
1936 dst
[38 + l
* 8].y
= cosf(light
->Phi
* 0.5f
);
1937 dst
[38 + l
* 8].z
= 1.0f
/ (dst
[38 + l
* 8].x
- dst
[38 + l
* 8].y
);
1938 dst
[39 + l
* 8].w
= (l
+ 1) == context
->ff
.num_lights_active
;
1943 nine_ff_load_point_and_fog_params(struct NineDevice9
*device
)
1945 struct nine_context
*context
= &device
->context
;
1946 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.vs_const
;
1948 if (!(context
->changed
.group
& NINE_STATE_FF_OTHER
))
1950 dst
[26].x
= asfloat(context
->rs
[D3DRS_POINTSIZE_MIN
]);
1951 dst
[26].y
= asfloat(context
->rs
[D3DRS_POINTSIZE_MAX
]);
1952 dst
[26].z
= asfloat(context
->rs
[D3DRS_POINTSIZE
]);
1953 dst
[26].w
= asfloat(context
->rs
[D3DRS_POINTSCALE_A
]);
1954 dst
[27].x
= asfloat(context
->rs
[D3DRS_POINTSCALE_B
]);
1955 dst
[27].y
= asfloat(context
->rs
[D3DRS_POINTSCALE_C
]);
1956 dst
[28].x
= asfloat(context
->rs
[D3DRS_FOGEND
]);
1957 dst
[28].y
= 1.0f
/ (asfloat(context
->rs
[D3DRS_FOGEND
]) - asfloat(context
->rs
[D3DRS_FOGSTART
]));
1958 if (isinf(dst
[28].y
))
1960 dst
[28].z
= asfloat(context
->rs
[D3DRS_FOGDENSITY
]);
1964 nine_ff_load_tex_matrices(struct NineDevice9
*device
)
1966 struct nine_context
*context
= &device
->context
;
1967 D3DMATRIX
*M
= (D3DMATRIX
*)device
->ff
.vs_const
;
1970 if (!(context
->ff
.changed
.transform
[0] & 0xff0000))
1972 for (s
= 0; s
< 8; ++s
) {
1973 if (IS_D3DTS_DIRTY(context
, TEXTURE0
+ s
))
1974 nine_d3d_matrix_transpose(&M
[32 + s
], nine_state_access_transform(&context
->ff
, D3DTS_TEXTURE0
+ s
, FALSE
));
1979 nine_ff_load_ps_params(struct NineDevice9
*device
)
1981 struct nine_context
*context
= &device
->context
;
1982 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.ps_const
;
1985 if (!(context
->changed
.group
& (NINE_STATE_FF_PSSTAGES
| NINE_STATE_FF_OTHER
)))
1988 for (s
= 0; s
< 8; ++s
)
1989 d3dcolor_to_rgba(&dst
[s
].x
, context
->ff
.tex_stage
[s
][D3DTSS_CONSTANT
]);
1991 for (s
= 0; s
< 8; ++s
) {
1992 dst
[8 + s
].x
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT00
]);
1993 dst
[8 + s
].y
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT01
]);
1994 dst
[8 + s
].z
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT10
]);
1995 dst
[8 + s
].w
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVMAT11
]);
1997 dst
[16 + s
/ 2].z
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLSCALE
]);
1998 dst
[16 + s
/ 2].w
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLOFFSET
]);
2000 dst
[16 + s
/ 2].x
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLSCALE
]);
2001 dst
[16 + s
/ 2].y
= asfloat(context
->ff
.tex_stage
[s
][D3DTSS_BUMPENVLOFFSET
]);
2005 d3dcolor_to_rgba(&dst
[20].x
, context
->rs
[D3DRS_TEXTUREFACTOR
]);
2006 d3dcolor_to_rgba(&dst
[21].x
, context
->rs
[D3DRS_FOGCOLOR
]);
2007 dst
[22].x
= asfloat(context
->rs
[D3DRS_FOGEND
]);
2008 dst
[22].y
= 1.0f
/ (asfloat(context
->rs
[D3DRS_FOGEND
]) - asfloat(context
->rs
[D3DRS_FOGSTART
]));
2009 dst
[22].z
= asfloat(context
->rs
[D3DRS_FOGDENSITY
]);
2013 nine_ff_load_viewport_info(struct NineDevice9
*device
)
2015 D3DVIEWPORT9
*viewport
= &device
->context
.viewport
;
2016 struct fvec4
*dst
= (struct fvec4
*)device
->ff
.vs_const
;
2017 float diffZ
= viewport
->MaxZ
- viewport
->MinZ
;
2019 /* Note: the other functions avoids to fill the const again if nothing changed.
2020 * But we don't have much to fill, and adding code to allow that may be complex
2021 * so just fill it always */
2022 dst
[100].x
= 2.0f
/ (float)(viewport
->Width
);
2023 dst
[100].y
= 2.0f
/ (float)(viewport
->Height
);
2024 dst
[100].z
= (diffZ
== 0.0f
) ? 0.0f
: (1.0f
/ diffZ
);
2025 dst
[100].w
= (float)(viewport
->Width
);
2026 dst
[101].x
= (float)(viewport
->X
);
2027 dst
[101].y
= (float)(viewport
->Y
);
2028 dst
[101].z
= (float)(viewport
->MinZ
);
2032 nine_ff_update(struct NineDevice9
*device
)
2034 struct nine_context
*context
= &device
->context
;
2035 struct pipe_constant_buffer cb
;
2037 DBG("vs=%p ps=%p\n", context
->vs
, context
->ps
);
2039 /* NOTE: the only reference belongs to the hash table */
2040 if (!context
->programmable_vs
) {
2041 device
->ff
.vs
= nine_ff_get_vs(device
);
2042 context
->changed
.group
|= NINE_STATE_VS
;
2045 device
->ff
.ps
= nine_ff_get_ps(device
);
2046 context
->changed
.group
|= NINE_STATE_PS
;
2049 if (!context
->programmable_vs
) {
2050 nine_ff_load_vs_transforms(device
);
2051 nine_ff_load_tex_matrices(device
);
2052 nine_ff_load_lights(device
);
2053 nine_ff_load_point_and_fog_params(device
);
2054 nine_ff_load_viewport_info(device
);
2056 memset(context
->ff
.changed
.transform
, 0, sizeof(context
->ff
.changed
.transform
));
2058 cb
.buffer_offset
= 0;
2060 cb
.user_buffer
= device
->ff
.vs_const
;
2061 cb
.buffer_size
= NINE_FF_NUM_VS_CONST
* 4 * sizeof(float);
2063 if (!device
->driver_caps
.user_cbufs
) {
2064 context
->pipe_data
.cb_vs_ff
.buffer_size
= cb
.buffer_size
;
2065 u_upload_data(device
->context
.pipe
->const_uploader
,
2068 device
->constbuf_alignment
,
2070 &context
->pipe_data
.cb_vs_ff
.buffer_offset
,
2071 &context
->pipe_data
.cb_vs_ff
.buffer
);
2072 u_upload_unmap(device
->context
.pipe
->const_uploader
);
2073 context
->pipe_data
.cb_vs_ff
.user_buffer
= NULL
;
2075 context
->pipe_data
.cb_vs_ff
= cb
;
2076 context
->commit
|= NINE_STATE_COMMIT_CONST_VS
;
2080 nine_ff_load_ps_params(device
);
2082 cb
.buffer_offset
= 0;
2084 cb
.user_buffer
= device
->ff
.ps_const
;
2085 cb
.buffer_size
= NINE_FF_NUM_PS_CONST
* 4 * sizeof(float);
2087 if (!device
->driver_caps
.user_cbufs
) {
2088 context
->pipe_data
.cb_ps_ff
.buffer_size
= cb
.buffer_size
;
2089 u_upload_data(device
->context
.pipe
->const_uploader
,
2092 device
->constbuf_alignment
,
2094 &context
->pipe_data
.cb_ps_ff
.buffer_offset
,
2095 &context
->pipe_data
.cb_ps_ff
.buffer
);
2096 u_upload_unmap(device
->context
.pipe
->const_uploader
);
2097 context
->pipe_data
.cb_ps_ff
.user_buffer
= NULL
;
2099 context
->pipe_data
.cb_ps_ff
= cb
;
2100 context
->commit
|= NINE_STATE_COMMIT_CONST_PS
;
2103 context
->changed
.group
&= ~NINE_STATE_FF
;
2108 nine_ff_init(struct NineDevice9
*device
)
2110 device
->ff
.ht_vs
= util_hash_table_create(nine_ff_vs_key_hash
,
2111 nine_ff_vs_key_comp
);
2112 device
->ff
.ht_ps
= util_hash_table_create(nine_ff_ps_key_hash
,
2113 nine_ff_ps_key_comp
);
2115 device
->ff
.ht_fvf
= util_hash_table_create(nine_ff_fvf_key_hash
,
2116 nine_ff_fvf_key_comp
);
2118 device
->ff
.vs_const
= CALLOC(NINE_FF_NUM_VS_CONST
, 4 * sizeof(float));
2119 device
->ff
.ps_const
= CALLOC(NINE_FF_NUM_PS_CONST
, 4 * sizeof(float));
2121 return device
->ff
.ht_vs
&& device
->ff
.ht_ps
&&
2122 device
->ff
.ht_fvf
&&
2123 device
->ff
.vs_const
&& device
->ff
.ps_const
;
2126 static enum pipe_error
nine_ff_ht_delete_cb(void *key
, void *value
, void *data
)
2128 NineUnknown_Unbind(NineUnknown(value
));
2133 nine_ff_fini(struct NineDevice9
*device
)
2135 if (device
->ff
.ht_vs
) {
2136 util_hash_table_foreach(device
->ff
.ht_vs
, nine_ff_ht_delete_cb
, NULL
);
2137 util_hash_table_destroy(device
->ff
.ht_vs
);
2139 if (device
->ff
.ht_ps
) {
2140 util_hash_table_foreach(device
->ff
.ht_ps
, nine_ff_ht_delete_cb
, NULL
);
2141 util_hash_table_destroy(device
->ff
.ht_ps
);
2143 if (device
->ff
.ht_fvf
) {
2144 util_hash_table_foreach(device
->ff
.ht_fvf
, nine_ff_ht_delete_cb
, NULL
);
2145 util_hash_table_destroy(device
->ff
.ht_fvf
);
2147 device
->ff
.vs
= NULL
; /* destroyed by unbinding from hash table */
2148 device
->ff
.ps
= NULL
;
2150 FREE(device
->ff
.vs_const
);
2151 FREE(device
->ff
.ps_const
);
2155 nine_ff_prune_vs(struct NineDevice9
*device
)
2157 struct nine_context
*context
= &device
->context
;
2159 if (device
->ff
.num_vs
> 100) {
2160 /* could destroy the bound one here, so unbind */
2161 context
->pipe
->bind_vs_state(context
->pipe
, NULL
);
2162 util_hash_table_foreach(device
->ff
.ht_vs
, nine_ff_ht_delete_cb
, NULL
);
2163 util_hash_table_clear(device
->ff
.ht_vs
);
2164 device
->ff
.num_vs
= 0;
2165 context
->changed
.group
|= NINE_STATE_VS
;
2169 nine_ff_prune_ps(struct NineDevice9
*device
)
2171 struct nine_context
*context
= &device
->context
;
2173 if (device
->ff
.num_ps
> 100) {
2174 /* could destroy the bound one here, so unbind */
2175 context
->pipe
->bind_fs_state(context
->pipe
, NULL
);
2176 util_hash_table_foreach(device
->ff
.ht_ps
, nine_ff_ht_delete_cb
, NULL
);
2177 util_hash_table_clear(device
->ff
.ht_ps
);
2178 device
->ff
.num_ps
= 0;
2179 context
->changed
.group
|= NINE_STATE_PS
;
2183 /* ========================================================================== */
2185 /* Matrix multiplication:
2187 * in memory: 0 1 2 3 (row major)
2193 * r0 = (r0 * cA) (r0 * cB) . .
2194 * r1 = (r1 * cA) (r1 * cB)
2198 * r: (11) (12) (13) (14)
2199 * (21) (22) (23) (24)
2200 * (31) (32) (33) (34)
2201 * (41) (42) (43) (44)
2209 * t.xyzw = MUL(v.xxxx, r[0]);
2210 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2211 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2212 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2214 * v.x = DP4(v, c[0]);
2215 * v.y = DP4(v, c[1]);
2216 * v.z = DP4(v, c[2]);
2217 * v.w = DP4(v, c[3]) = 1
2222 nine_D3DMATRIX_print(const D3DMATRIX *M)
2224 DBG("\n(%f %f %f %f)\n"
2228 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2229 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2230 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2231 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2236 nine_DP4_row_col(const D3DMATRIX
*A
, int r
, const D3DMATRIX
*B
, int c
)
2238 return A
->m
[r
][0] * B
->m
[0][c
] +
2239 A
->m
[r
][1] * B
->m
[1][c
] +
2240 A
->m
[r
][2] * B
->m
[2][c
] +
2241 A
->m
[r
][3] * B
->m
[3][c
];
2245 nine_DP4_vec_col(const D3DVECTOR
*v
, const D3DMATRIX
*M
, int c
)
2247 return v
->x
* M
->m
[0][c
] +
2254 nine_DP3_vec_col(const D3DVECTOR
*v
, const D3DMATRIX
*M
, int c
)
2256 return v
->x
* M
->m
[0][c
] +
2262 nine_d3d_matrix_matrix_mul(D3DMATRIX
*D
, const D3DMATRIX
*L
, const D3DMATRIX
*R
)
2264 D
->_11
= nine_DP4_row_col(L
, 0, R
, 0);
2265 D
->_12
= nine_DP4_row_col(L
, 0, R
, 1);
2266 D
->_13
= nine_DP4_row_col(L
, 0, R
, 2);
2267 D
->_14
= nine_DP4_row_col(L
, 0, R
, 3);
2269 D
->_21
= nine_DP4_row_col(L
, 1, R
, 0);
2270 D
->_22
= nine_DP4_row_col(L
, 1, R
, 1);
2271 D
->_23
= nine_DP4_row_col(L
, 1, R
, 2);
2272 D
->_24
= nine_DP4_row_col(L
, 1, R
, 3);
2274 D
->_31
= nine_DP4_row_col(L
, 2, R
, 0);
2275 D
->_32
= nine_DP4_row_col(L
, 2, R
, 1);
2276 D
->_33
= nine_DP4_row_col(L
, 2, R
, 2);
2277 D
->_34
= nine_DP4_row_col(L
, 2, R
, 3);
2279 D
->_41
= nine_DP4_row_col(L
, 3, R
, 0);
2280 D
->_42
= nine_DP4_row_col(L
, 3, R
, 1);
2281 D
->_43
= nine_DP4_row_col(L
, 3, R
, 2);
2282 D
->_44
= nine_DP4_row_col(L
, 3, R
, 3);
2286 nine_d3d_vector4_matrix_mul(D3DVECTOR
*d
, const D3DVECTOR
*v
, const D3DMATRIX
*M
)
2288 d
->x
= nine_DP4_vec_col(v
, M
, 0);
2289 d
->y
= nine_DP4_vec_col(v
, M
, 1);
2290 d
->z
= nine_DP4_vec_col(v
, M
, 2);
2294 nine_d3d_vector3_matrix_mul(D3DVECTOR
*d
, const D3DVECTOR
*v
, const D3DMATRIX
*M
)
2296 d
->x
= nine_DP3_vec_col(v
, M
, 0);
2297 d
->y
= nine_DP3_vec_col(v
, M
, 1);
2298 d
->z
= nine_DP3_vec_col(v
, M
, 2);
2302 nine_d3d_matrix_transpose(D3DMATRIX
*D
, const D3DMATRIX
*M
)
2305 for (i
= 0; i
< 4; ++i
)
2306 for (j
= 0; j
< 4; ++j
)
2307 D
->m
[i
][j
] = M
->m
[j
][i
];
2310 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2311 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2312 if (t > 0.0f) pos += t; else neg += t; } while(0)
2314 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2315 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2316 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2318 nine_d3d_matrix_det(const D3DMATRIX
*M
)
2323 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2324 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2325 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2327 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2328 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2329 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2331 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2332 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2333 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2335 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2336 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2337 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2339 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2340 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2341 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2343 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2344 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2345 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2347 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2348 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2349 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2351 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2352 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2353 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2358 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2359 * I have no idea where this code came from.
2362 nine_d3d_matrix_inverse(D3DMATRIX
*D
, const D3DMATRIX
*M
)
2368 M
->m
[1][1] * M
->m
[2][2] * M
->m
[3][3] -
2369 M
->m
[1][1] * M
->m
[3][2] * M
->m
[2][3] -
2370 M
->m
[1][2] * M
->m
[2][1] * M
->m
[3][3] +
2371 M
->m
[1][2] * M
->m
[3][1] * M
->m
[2][3] +
2372 M
->m
[1][3] * M
->m
[2][1] * M
->m
[3][2] -
2373 M
->m
[1][3] * M
->m
[3][1] * M
->m
[2][2];
2376 -M
->m
[0][1] * M
->m
[2][2] * M
->m
[3][3] +
2377 M
->m
[0][1] * M
->m
[3][2] * M
->m
[2][3] +
2378 M
->m
[0][2] * M
->m
[2][1] * M
->m
[3][3] -
2379 M
->m
[0][2] * M
->m
[3][1] * M
->m
[2][3] -
2380 M
->m
[0][3] * M
->m
[2][1] * M
->m
[3][2] +
2381 M
->m
[0][3] * M
->m
[3][1] * M
->m
[2][2];
2384 M
->m
[0][1] * M
->m
[1][2] * M
->m
[3][3] -
2385 M
->m
[0][1] * M
->m
[3][2] * M
->m
[1][3] -
2386 M
->m
[0][2] * M
->m
[1][1] * M
->m
[3][3] +
2387 M
->m
[0][2] * M
->m
[3][1] * M
->m
[1][3] +
2388 M
->m
[0][3] * M
->m
[1][1] * M
->m
[3][2] -
2389 M
->m
[0][3] * M
->m
[3][1] * M
->m
[1][2];
2392 -M
->m
[0][1] * M
->m
[1][2] * M
->m
[2][3] +
2393 M
->m
[0][1] * M
->m
[2][2] * M
->m
[1][3] +
2394 M
->m
[0][2] * M
->m
[1][1] * M
->m
[2][3] -
2395 M
->m
[0][2] * M
->m
[2][1] * M
->m
[1][3] -
2396 M
->m
[0][3] * M
->m
[1][1] * M
->m
[2][2] +
2397 M
->m
[0][3] * M
->m
[2][1] * M
->m
[1][2];
2400 -M
->m
[1][0] * M
->m
[2][2] * M
->m
[3][3] +
2401 M
->m
[1][0] * M
->m
[3][2] * M
->m
[2][3] +
2402 M
->m
[1][2] * M
->m
[2][0] * M
->m
[3][3] -
2403 M
->m
[1][2] * M
->m
[3][0] * M
->m
[2][3] -
2404 M
->m
[1][3] * M
->m
[2][0] * M
->m
[3][2] +
2405 M
->m
[1][3] * M
->m
[3][0] * M
->m
[2][2];
2408 M
->m
[0][0] * M
->m
[2][2] * M
->m
[3][3] -
2409 M
->m
[0][0] * M
->m
[3][2] * M
->m
[2][3] -
2410 M
->m
[0][2] * M
->m
[2][0] * M
->m
[3][3] +
2411 M
->m
[0][2] * M
->m
[3][0] * M
->m
[2][3] +
2412 M
->m
[0][3] * M
->m
[2][0] * M
->m
[3][2] -
2413 M
->m
[0][3] * M
->m
[3][0] * M
->m
[2][2];
2416 -M
->m
[0][0] * M
->m
[1][2] * M
->m
[3][3] +
2417 M
->m
[0][0] * M
->m
[3][2] * M
->m
[1][3] +
2418 M
->m
[0][2] * M
->m
[1][0] * M
->m
[3][3] -
2419 M
->m
[0][2] * M
->m
[3][0] * M
->m
[1][3] -
2420 M
->m
[0][3] * M
->m
[1][0] * M
->m
[3][2] +
2421 M
->m
[0][3] * M
->m
[3][0] * M
->m
[1][2];
2424 M
->m
[0][0] * M
->m
[1][2] * M
->m
[2][3] -
2425 M
->m
[0][0] * M
->m
[2][2] * M
->m
[1][3] -
2426 M
->m
[0][2] * M
->m
[1][0] * M
->m
[2][3] +
2427 M
->m
[0][2] * M
->m
[2][0] * M
->m
[1][3] +
2428 M
->m
[0][3] * M
->m
[1][0] * M
->m
[2][2] -
2429 M
->m
[0][3] * M
->m
[2][0] * M
->m
[1][2];
2432 M
->m
[1][0] * M
->m
[2][1] * M
->m
[3][3] -
2433 M
->m
[1][0] * M
->m
[3][1] * M
->m
[2][3] -
2434 M
->m
[1][1] * M
->m
[2][0] * M
->m
[3][3] +
2435 M
->m
[1][1] * M
->m
[3][0] * M
->m
[2][3] +
2436 M
->m
[1][3] * M
->m
[2][0] * M
->m
[3][1] -
2437 M
->m
[1][3] * M
->m
[3][0] * M
->m
[2][1];
2440 -M
->m
[0][0] * M
->m
[2][1] * M
->m
[3][3] +
2441 M
->m
[0][0] * M
->m
[3][1] * M
->m
[2][3] +
2442 M
->m
[0][1] * M
->m
[2][0] * M
->m
[3][3] -
2443 M
->m
[0][1] * M
->m
[3][0] * M
->m
[2][3] -
2444 M
->m
[0][3] * M
->m
[2][0] * M
->m
[3][1] +
2445 M
->m
[0][3] * M
->m
[3][0] * M
->m
[2][1];
2448 M
->m
[0][0] * M
->m
[1][1] * M
->m
[3][3] -
2449 M
->m
[0][0] * M
->m
[3][1] * M
->m
[1][3] -
2450 M
->m
[0][1] * M
->m
[1][0] * M
->m
[3][3] +
2451 M
->m
[0][1] * M
->m
[3][0] * M
->m
[1][3] +
2452 M
->m
[0][3] * M
->m
[1][0] * M
->m
[3][1] -
2453 M
->m
[0][3] * M
->m
[3][0] * M
->m
[1][1];
2456 -M
->m
[0][0] * M
->m
[1][1] * M
->m
[2][3] +
2457 M
->m
[0][0] * M
->m
[2][1] * M
->m
[1][3] +
2458 M
->m
[0][1] * M
->m
[1][0] * M
->m
[2][3] -
2459 M
->m
[0][1] * M
->m
[2][0] * M
->m
[1][3] -
2460 M
->m
[0][3] * M
->m
[1][0] * M
->m
[2][1] +
2461 M
->m
[0][3] * M
->m
[2][0] * M
->m
[1][1];
2464 -M
->m
[1][0] * M
->m
[2][1] * M
->m
[3][2] +
2465 M
->m
[1][0] * M
->m
[3][1] * M
->m
[2][2] +
2466 M
->m
[1][1] * M
->m
[2][0] * M
->m
[3][2] -
2467 M
->m
[1][1] * M
->m
[3][0] * M
->m
[2][2] -
2468 M
->m
[1][2] * M
->m
[2][0] * M
->m
[3][1] +
2469 M
->m
[1][2] * M
->m
[3][0] * M
->m
[2][1];
2472 M
->m
[0][0] * M
->m
[2][1] * M
->m
[3][2] -
2473 M
->m
[0][0] * M
->m
[3][1] * M
->m
[2][2] -
2474 M
->m
[0][1] * M
->m
[2][0] * M
->m
[3][2] +
2475 M
->m
[0][1] * M
->m
[3][0] * M
->m
[2][2] +
2476 M
->m
[0][2] * M
->m
[2][0] * M
->m
[3][1] -
2477 M
->m
[0][2] * M
->m
[3][0] * M
->m
[2][1];
2480 -M
->m
[0][0] * M
->m
[1][1] * M
->m
[3][2] +
2481 M
->m
[0][0] * M
->m
[3][1] * M
->m
[1][2] +
2482 M
->m
[0][1] * M
->m
[1][0] * M
->m
[3][2] -
2483 M
->m
[0][1] * M
->m
[3][0] * M
->m
[1][2] -
2484 M
->m
[0][2] * M
->m
[1][0] * M
->m
[3][1] +
2485 M
->m
[0][2] * M
->m
[3][0] * M
->m
[1][1];
2488 M
->m
[0][0] * M
->m
[1][1] * M
->m
[2][2] -
2489 M
->m
[0][0] * M
->m
[2][1] * M
->m
[1][2] -
2490 M
->m
[0][1] * M
->m
[1][0] * M
->m
[2][2] +
2491 M
->m
[0][1] * M
->m
[2][0] * M
->m
[1][2] +
2492 M
->m
[0][2] * M
->m
[1][0] * M
->m
[2][1] -
2493 M
->m
[0][2] * M
->m
[2][0] * M
->m
[1][1];
2496 M
->m
[0][0] * D
->m
[0][0] +
2497 M
->m
[1][0] * D
->m
[0][1] +
2498 M
->m
[2][0] * D
->m
[0][2] +
2499 M
->m
[3][0] * D
->m
[0][3];
2501 if (det
< 1e-30) {/* non inversible */
2502 *D
= *M
; /* wine tests */
2508 for (i
= 0; i
< 4; i
++)
2509 for (k
= 0; k
< 4; k
++)
2516 nine_d3d_matrix_matrix_mul(&I
, D
, M
);
2518 for (i
= 0; i
< 4; ++i
)
2519 for (k
= 0; k
< 4; ++k
)
2520 if (fabsf(I
.m
[i
][k
] - (float)(i
== k
)) > 1e-3)
2521 DBG("Matrix inversion check FAILED !\n");