1 /**************************************************************************
3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 #include "main/glheader.h"
29 #include "main/macros.h"
30 #include "main/mtypes.h"
31 #include "main/colormac.h"
33 #include "intel_buffers.h"
34 #include "intel_fbo.h"
35 #include "intel_screen.h"
36 #include "intel_span.h"
37 #include "intel_regions.h"
38 #include "intel_tex.h"
40 #include "swrast/swrast.h"
43 intel_set_span_functions(struct intel_context
*intel
,
44 struct gl_renderbuffer
*rb
);
46 #define SPAN_CACHE_SIZE 4096
49 get_span_cache(struct intel_renderbuffer
*irb
, uint32_t offset
)
51 if (irb
->span_cache
== NULL
) {
52 irb
->span_cache
= _mesa_malloc(SPAN_CACHE_SIZE
);
53 irb
->span_cache_offset
= -1;
56 if ((offset
& ~(SPAN_CACHE_SIZE
- 1)) != irb
->span_cache_offset
) {
57 irb
->span_cache_offset
= offset
& ~(SPAN_CACHE_SIZE
- 1);
58 dri_bo_get_subdata(irb
->region
->buffer
, irb
->span_cache_offset
,
59 SPAN_CACHE_SIZE
, irb
->span_cache
);
64 clear_span_cache(struct intel_renderbuffer
*irb
)
66 irb
->span_cache_offset
= -1;
70 pread_32(struct intel_renderbuffer
*irb
, uint32_t offset
)
72 get_span_cache(irb
, offset
);
74 return *(uint32_t *)(irb
->span_cache
+ (offset
& (SPAN_CACHE_SIZE
- 1)));
78 pread_xrgb8888(struct intel_renderbuffer
*irb
, uint32_t offset
)
80 get_span_cache(irb
, offset
);
82 return *(uint32_t *)(irb
->span_cache
+ (offset
& (SPAN_CACHE_SIZE
- 1))) |
87 pread_16(struct intel_renderbuffer
*irb
, uint32_t offset
)
89 get_span_cache(irb
, offset
);
91 return *(uint16_t *)(irb
->span_cache
+ (offset
& (SPAN_CACHE_SIZE
- 1)));
95 pread_8(struct intel_renderbuffer
*irb
, uint32_t offset
)
97 get_span_cache(irb
, offset
);
99 return *(uint8_t *)(irb
->span_cache
+ (offset
& (SPAN_CACHE_SIZE
- 1)));
103 pwrite_32(struct intel_renderbuffer
*irb
, uint32_t offset
, uint32_t val
)
105 clear_span_cache(irb
);
107 dri_bo_subdata(irb
->region
->buffer
, offset
, 4, &val
);
111 pwrite_xrgb8888(struct intel_renderbuffer
*irb
, uint32_t offset
, uint32_t val
)
113 clear_span_cache(irb
);
115 dri_bo_subdata(irb
->region
->buffer
, offset
, 3, &val
);
119 pwrite_16(struct intel_renderbuffer
*irb
, uint32_t offset
, uint16_t val
)
121 clear_span_cache(irb
);
123 dri_bo_subdata(irb
->region
->buffer
, offset
, 2, &val
);
127 pwrite_8(struct intel_renderbuffer
*irb
, uint32_t offset
, uint8_t val
)
129 clear_span_cache(irb
);
131 dri_bo_subdata(irb
->region
->buffer
, offset
, 1, &val
);
135 z24s8_to_s8z24(uint32_t val
)
137 return (val
<< 24) | (val
>> 8);
141 s8z24_to_z24s8(uint32_t val
)
143 return (val
>> 24) | (val
<< 8);
146 static uint32_t no_tile_swizzle(struct intel_renderbuffer
*irb
,
149 return (y
* irb
->region
->pitch
+ x
) * irb
->region
->cpp
;
153 * Deal with tiled surfaces
156 static uint32_t x_tile_swizzle(struct intel_renderbuffer
*irb
,
161 int x_tile_off
, y_tile_off
;
162 int x_tile_number
, y_tile_number
;
163 int tile_off
, tile_base
;
165 tile_stride
= (irb
->region
->pitch
* irb
->region
->cpp
) << 3;
167 xbyte
= x
* irb
->region
->cpp
;
169 x_tile_off
= xbyte
& 0x1ff;
172 x_tile_number
= xbyte
>> 9;
173 y_tile_number
= y
>> 3;
175 tile_off
= (y_tile_off
<< 9) + x_tile_off
;
177 switch (irb
->region
->bit_6_swizzle
) {
178 case I915_BIT_6_SWIZZLE_NONE
:
180 case I915_BIT_6_SWIZZLE_9
:
181 tile_off
^= ((tile_off
>> 3) & 64);
183 case I915_BIT_6_SWIZZLE_9_10
:
184 tile_off
^= ((tile_off
>> 3) & 64) ^ ((tile_off
>> 4) & 64);
186 case I915_BIT_6_SWIZZLE_9_11
:
187 tile_off
^= ((tile_off
>> 3) & 64) ^ ((tile_off
>> 5) & 64);
189 case I915_BIT_6_SWIZZLE_9_10_11
:
190 tile_off
^= ((tile_off
>> 3) & 64) ^ ((tile_off
>> 4) & 64) ^
191 ((tile_off
>> 5) & 64);
194 fprintf(stderr
, "Unknown tile swizzling mode %d\n",
195 irb
->region
->bit_6_swizzle
);
199 tile_base
= (x_tile_number
<< 12) + y_tile_number
* tile_stride
;
202 printf("(%d,%d) -> %d + %d = %d (pitch = %d, tstride = %d)\n",
203 x
, y
, tile_off
, tile_base
,
204 tile_off
+ tile_base
,
205 irb
->region
->pitch
, tile_stride
);
208 return tile_base
+ tile_off
;
211 static uint32_t y_tile_swizzle(struct intel_renderbuffer
*irb
,
216 int x_tile_off
, y_tile_off
;
217 int x_tile_number
, y_tile_number
;
218 int tile_off
, tile_base
;
220 tile_stride
= (irb
->region
->pitch
* irb
->region
->cpp
) << 5;
222 xbyte
= x
* irb
->region
->cpp
;
224 x_tile_off
= xbyte
& 0x7f;
225 y_tile_off
= y
& 0x1f;
227 x_tile_number
= xbyte
>> 7;
228 y_tile_number
= y
>> 5;
230 tile_off
= ((x_tile_off
& ~0xf) << 5) + (y_tile_off
<< 4) +
233 switch (irb
->region
->bit_6_swizzle
) {
234 case I915_BIT_6_SWIZZLE_NONE
:
236 case I915_BIT_6_SWIZZLE_9
:
237 tile_off
^= ((tile_off
>> 3) & 64);
239 case I915_BIT_6_SWIZZLE_9_10
:
240 tile_off
^= ((tile_off
>> 3) & 64) ^ ((tile_off
>> 4) & 64);
242 case I915_BIT_6_SWIZZLE_9_11
:
243 tile_off
^= ((tile_off
>> 3) & 64) ^ ((tile_off
>> 5) & 64);
245 case I915_BIT_6_SWIZZLE_9_10_11
:
246 tile_off
^= ((tile_off
>> 3) & 64) ^ ((tile_off
>> 4) & 64) ^
247 ((tile_off
>> 5) & 64);
250 fprintf(stderr
, "Unknown tile swizzling mode %d\n",
251 irb
->region
->bit_6_swizzle
);
255 tile_base
= (x_tile_number
<< 12) + y_tile_number
* tile_stride
;
257 return tile_base
+ tile_off
;
261 break intelWriteRGBASpan_ARGB8888
268 struct intel_context *intel = intel_context(ctx); \
269 struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
270 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
271 const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
272 unsigned int num_cliprects; \
273 struct drm_clip_rect *cliprects; \
277 intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
279 /* XXX FBO: this is identical to the macro in spantmp2.h except we get
280 * the cliprect info from the context, not the driDrawable.
281 * Move this into spantmp2.h someday.
283 #define HW_CLIPLOOP() \
285 int _nc = num_cliprects; \
287 int minx = cliprects[_nc].x1 - x_off; \
288 int miny = cliprects[_nc].y1 - y_off; \
289 int maxx = cliprects[_nc].x2 - x_off; \
290 int maxy = cliprects[_nc].y2 - y_off;
296 #define Y_FLIP(_y) ((_y) * yScale + yBias)
298 /* XXX with GEM, these need to tell the kernel */
303 /* Convenience macros to avoid typing the swizzle argument over and over */
304 #define NO_TILE(_X, _Y) no_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
305 #define X_TILE(_X, _Y) x_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
306 #define Y_TILE(_X, _Y) y_tile_swizzle(irb, (_X) + x_off, (_Y) + y_off)
308 /* r5g6b5 color span and pixel functions */
309 #define INTEL_PIXEL_FMT GL_RGB
310 #define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
311 #define INTEL_READ_VALUE(offset) pread_16(irb, offset)
312 #define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v)
313 #define INTEL_TAG(x) x##_RGB565
314 #include "intel_spantmp.h"
316 /* a4r4g4b4 color span and pixel functions */
317 #define INTEL_PIXEL_FMT GL_BGRA
318 #define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
319 #define INTEL_READ_VALUE(offset) pread_16(irb, offset)
320 #define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v)
321 #define INTEL_TAG(x) x##_ARGB4444
322 #include "intel_spantmp.h"
324 /* a1r5g5b5 color span and pixel functions */
325 #define INTEL_PIXEL_FMT GL_BGRA
326 #define INTEL_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
327 #define INTEL_READ_VALUE(offset) pread_16(irb, offset)
328 #define INTEL_WRITE_VALUE(offset, v) pwrite_16(irb, offset, v)
329 #define INTEL_TAG(x) x##_ARGB1555
330 #include "intel_spantmp.h"
332 /* a8r8g8b8 color span and pixel functions */
333 #define INTEL_PIXEL_FMT GL_BGRA
334 #define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
335 #define INTEL_READ_VALUE(offset) pread_32(irb, offset)
336 #define INTEL_WRITE_VALUE(offset, v) pwrite_32(irb, offset, v)
337 #define INTEL_TAG(x) x##_ARGB8888
338 #include "intel_spantmp.h"
340 /* x8r8g8b8 color span and pixel functions */
341 #define INTEL_PIXEL_FMT GL_BGRA
342 #define INTEL_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
343 #define INTEL_READ_VALUE(offset) pread_xrgb8888(irb, offset)
344 #define INTEL_WRITE_VALUE(offset, v) pwrite_xrgb8888(irb, offset, v)
345 #define INTEL_TAG(x) x##_xRGB8888
346 #include "intel_spantmp.h"
348 #define LOCAL_DEPTH_VARS \
349 struct intel_context *intel = intel_context(ctx); \
350 struct intel_renderbuffer *irb = intel_renderbuffer(rb); \
351 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \
352 const GLint yBias = ctx->DrawBuffer->Name ? 0 : irb->Base.Height - 1;\
353 unsigned int num_cliprects; \
354 struct drm_clip_rect *cliprects; \
356 intel_get_cliprects(intel, &cliprects, &num_cliprects, &x_off, &y_off);
359 #define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
361 /* z16 depthbuffer functions. */
362 #define INTEL_VALUE_TYPE GLushort
363 #define INTEL_WRITE_DEPTH(offset, d) pwrite_16(irb, offset, d)
364 #define INTEL_READ_DEPTH(offset) pread_16(irb, offset)
365 #define INTEL_TAG(name) name##_z16
366 #include "intel_depthtmp.h"
368 /* z24 depthbuffer functions. */
369 #define INTEL_VALUE_TYPE GLuint
370 #define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, d)
371 #define INTEL_READ_DEPTH(offset) pread_32(irb, offset)
372 #define INTEL_TAG(name) name##_z24
373 #include "intel_depthtmp.h"
375 /* z24s8 depthbuffer functions. */
376 #define INTEL_VALUE_TYPE GLuint
377 #define INTEL_WRITE_DEPTH(offset, d) pwrite_32(irb, offset, z24s8_to_s8z24(d))
378 #define INTEL_READ_DEPTH(offset) s8z24_to_z24s8(pread_32(irb, offset))
379 #define INTEL_TAG(name) name##_z24_s8
380 #include "intel_depthtmp.h"
384 ** 8-bit stencil function (XXX FBO: This is obsolete)
386 #define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, NO_TILE(_x, _y) + 3, d)
387 #define READ_STENCIL(d, _x, _y) d = pread_8(irb, NO_TILE(_x, _y) + 3);
388 #define TAG(x) intel##x##_z24_s8
389 #include "stenciltmp.h"
392 ** 8-bit x-tile stencil function (XXX FBO: This is obsolete)
394 #define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, X_TILE(_x, _y) + 3, d)
395 #define READ_STENCIL(d, _x, _y) d = pread_8(irb, X_TILE(_x, _y) + 3);
396 #define TAG(x) intel_XTile_##x##_z24_s8
397 #include "stenciltmp.h"
400 ** 8-bit y-tile stencil function (XXX FBO: This is obsolete)
402 #define WRITE_STENCIL(_x, _y, d) pwrite_8(irb, Y_TILE(_x, _y) + 3, d)
403 #define READ_STENCIL(d, _x, _y) d = pread_8(irb, Y_TILE(_x, _y) + 3)
404 #define TAG(x) intel_YTile_##x##_z24_s8
405 #include "stenciltmp.h"
408 intel_renderbuffer_map(struct intel_context
*intel
, struct gl_renderbuffer
*rb
)
410 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
412 if (irb
== NULL
|| irb
->region
== NULL
)
415 intel_set_span_functions(intel
, rb
);
419 intel_renderbuffer_unmap(struct intel_context
*intel
,
420 struct gl_renderbuffer
*rb
)
422 struct intel_renderbuffer
*irb
= intel_renderbuffer(rb
);
424 if (irb
== NULL
|| irb
->region
== NULL
)
427 clear_span_cache(irb
);
434 * Map or unmap all the renderbuffers which we may need during
435 * software rendering.
436 * XXX in the future, we could probably convey extra information to
437 * reduce the number of mappings needed. I.e. if doing a glReadPixels
438 * from the depth buffer, we really only need one mapping.
440 * XXX Rewrite this function someday.
441 * We can probably just loop over all the renderbuffer attachments,
442 * map/unmap all of them, and not worry about the _ColorDrawBuffers
443 * _ColorReadBuffer, _DepthBuffer or _StencilBuffer fields.
446 intel_map_unmap_framebuffer(struct intel_context
*intel
,
447 struct gl_framebuffer
*fb
,
452 /* color draw buffers */
453 for (i
= 0; i
< fb
->_NumColorDrawBuffers
; i
++) {
455 intel_renderbuffer_map(intel
, fb
->_ColorDrawBuffers
[i
]);
457 intel_renderbuffer_unmap(intel
, fb
->_ColorDrawBuffers
[i
]);
460 /* color read buffer */
462 intel_renderbuffer_map(intel
, fb
->_ColorReadBuffer
);
464 intel_renderbuffer_unmap(intel
, fb
->_ColorReadBuffer
);
466 /* check for render to textures */
467 for (i
= 0; i
< BUFFER_COUNT
; i
++) {
468 struct gl_renderbuffer_attachment
*att
=
470 struct gl_texture_object
*tex
= att
->Texture
;
472 /* render to texture */
473 ASSERT(att
->Renderbuffer
);
475 intel_tex_map_images(intel
, intel_texture_object(tex
));
477 intel_tex_unmap_images(intel
, intel_texture_object(tex
));
481 /* depth buffer (Note wrapper!) */
482 if (fb
->_DepthBuffer
) {
484 intel_renderbuffer_map(intel
, fb
->_DepthBuffer
->Wrapped
);
486 intel_renderbuffer_unmap(intel
, fb
->_DepthBuffer
->Wrapped
);
489 /* stencil buffer (Note wrapper!) */
490 if (fb
->_StencilBuffer
) {
492 intel_renderbuffer_map(intel
, fb
->_StencilBuffer
->Wrapped
);
494 intel_renderbuffer_unmap(intel
, fb
->_StencilBuffer
->Wrapped
);
499 * Prepare for software rendering. Map current read/draw framebuffers'
500 * renderbuffes and all currently bound texture objects.
502 * Old note: Moved locking out to get reasonable span performance.
505 intelSpanRenderStart(GLcontext
* ctx
)
507 struct intel_context
*intel
= intel_context(ctx
);
510 intelFlush(&intel
->ctx
);
511 LOCK_HARDWARE(intel
);
513 for (i
= 0; i
< ctx
->Const
.MaxTextureImageUnits
; i
++) {
514 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
) {
515 struct gl_texture_object
*texObj
= ctx
->Texture
.Unit
[i
]._Current
;
516 intel_tex_map_images(intel
, intel_texture_object(texObj
));
520 intel_map_unmap_framebuffer(intel
, ctx
->DrawBuffer
, GL_TRUE
);
521 if (ctx
->ReadBuffer
!= ctx
->DrawBuffer
)
522 intel_map_unmap_framebuffer(intel
, ctx
->ReadBuffer
, GL_TRUE
);
526 * Called when done software rendering. Unmap the buffers we mapped in
527 * the above function.
530 intelSpanRenderFinish(GLcontext
* ctx
)
532 struct intel_context
*intel
= intel_context(ctx
);
537 for (i
= 0; i
< ctx
->Const
.MaxTextureImageUnits
; i
++) {
538 if (ctx
->Texture
.Unit
[i
]._ReallyEnabled
) {
539 struct gl_texture_object
*texObj
= ctx
->Texture
.Unit
[i
]._Current
;
540 intel_tex_unmap_images(intel
, intel_texture_object(texObj
));
544 intel_map_unmap_framebuffer(intel
, ctx
->DrawBuffer
, GL_FALSE
);
545 if (ctx
->ReadBuffer
!= ctx
->DrawBuffer
)
546 intel_map_unmap_framebuffer(intel
, ctx
->ReadBuffer
, GL_FALSE
);
548 UNLOCK_HARDWARE(intel
);
553 intelInitSpanFuncs(GLcontext
* ctx
)
555 struct swrast_device_driver
*swdd
= _swrast_GetDeviceDriverReference(ctx
);
556 swdd
->SpanRenderStart
= intelSpanRenderStart
;
557 swdd
->SpanRenderFinish
= intelSpanRenderFinish
;
562 * Plug in appropriate span read/write functions for the given renderbuffer.
563 * These are used for the software fallbacks.
566 intel_set_span_functions(struct intel_context
*intel
,
567 struct gl_renderbuffer
*rb
)
569 struct intel_renderbuffer
*irb
= (struct intel_renderbuffer
*) rb
;
572 /* If in GEM mode, we need to do the tile address swizzling ourselves,
573 * instead of the fence registers handling it.
576 tiling
= irb
->region
->tiling
;
578 tiling
= I915_TILING_NONE
;
580 switch (irb
->texformat
) {
581 case MESA_FORMAT_RGB565
:
583 case I915_TILING_NONE
:
585 intelInitPointers_RGB565(rb
);
588 intel_XTile_InitPointers_RGB565(rb
);
591 intel_YTile_InitPointers_RGB565(rb
);
595 case MESA_FORMAT_ARGB4444
:
597 case I915_TILING_NONE
:
599 intelInitPointers_ARGB4444(rb
);
602 intel_XTile_InitPointers_ARGB4444(rb
);
605 intel_YTile_InitPointers_ARGB4444(rb
);
609 case MESA_FORMAT_ARGB1555
:
611 case I915_TILING_NONE
:
613 intelInitPointers_ARGB1555(rb
);
616 intel_XTile_InitPointers_ARGB1555(rb
);
619 intel_YTile_InitPointers_ARGB1555(rb
);
623 case MESA_FORMAT_ARGB8888
:
624 if (rb
->AlphaBits
== 0) { /* XXX: Need xRGB8888 Mesa format */
627 case I915_TILING_NONE
:
629 intelInitPointers_xRGB8888(rb
);
632 intel_XTile_InitPointers_xRGB8888(rb
);
635 intel_YTile_InitPointers_xRGB8888(rb
);
641 case I915_TILING_NONE
:
643 intelInitPointers_ARGB8888(rb
);
646 intel_XTile_InitPointers_ARGB8888(rb
);
649 intel_YTile_InitPointers_ARGB8888(rb
);
654 case MESA_FORMAT_Z16
:
656 case I915_TILING_NONE
:
658 intelInitDepthPointers_z16(rb
);
661 intel_XTile_InitDepthPointers_z16(rb
);
664 intel_YTile_InitDepthPointers_z16(rb
);
668 case MESA_FORMAT_S8_Z24
:
669 /* There are a few different ways SW asks us to access the S8Z24 data:
670 * Z24 depth-only depth reads
672 * S8Z24 stencil reads.
674 if (rb
->_ActualFormat
== GL_DEPTH_COMPONENT24
) {
676 case I915_TILING_NONE
:
678 intelInitDepthPointers_z24(rb
);
681 intel_XTile_InitDepthPointers_z24(rb
);
684 intel_YTile_InitDepthPointers_z24(rb
);
687 } else if (rb
->_ActualFormat
== GL_DEPTH24_STENCIL8_EXT
) {
689 case I915_TILING_NONE
:
691 intelInitDepthPointers_z24_s8(rb
);
694 intel_XTile_InitDepthPointers_z24_s8(rb
);
697 intel_YTile_InitDepthPointers_z24_s8(rb
);
700 } else if (rb
->_ActualFormat
== GL_STENCIL_INDEX8_EXT
) {
702 case I915_TILING_NONE
:
704 intelInitStencilPointers_z24_s8(rb
);
707 intel_XTile_InitStencilPointers_z24_s8(rb
);
710 intel_YTile_InitStencilPointers_z24_s8(rb
);
715 "Unexpected ActualFormat in intelSetSpanFunctions");
720 "Unexpected MesaFormat in intelSetSpanFunctions");