git.libre-soc.org Git - mesa.git/blob - src/mesa/drivers/dri/radeon/radeon

1 /**************************************************************************

6 The Weather Channel (TM) funded Tungsten Graphics to develop the

7 initial release of the Radeon 8500 driver under the XFree86 license.

8 This notice must be preserved.

12 Permission is hereby granted, free of charge, to any person obtaining a

13 copy of this software and associated documentation files (the "Software"),

14 to deal in the Software without restriction, including without limitation

15 on the rights to use, copy, modify, merge, publish, distribute, sub

16 license, and/or sell copies of the Software, and to permit persons to whom

17 the Software is furnished to do so, subject to the following conditions:

19 The above copyright notice and this permission notice (including the next

20 paragraph) shall be included in all copies or substantial portions of the

21 Software.

23 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR

24 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,

25 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL

26 ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,

27 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR

28 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE

29 USE OR OTHER DEALINGS IN THE SOFTWARE.

31 **************************************************************************/

33 #include <errno.h>

34 #include "radeon_common.h"

35 #include "main/simple_list.h"

37 #if defined(USE_X86_ASM)

38 #define COPY_DWORDS( dst, src, nr ) \

39 do { \

40 int __tmp; \

41 __asm__ __volatile__( "rep ; movsl" \

                               : "=%c" (__tmp), "=D" (dst), "=S" (__tmp) \

43 : "0" (nr), \

44 "D" ((long)dst), \

45 "S" ((long)src) ); \

46 } while (0)

47 #else

48 #define COPY_DWORDS( dst, src, nr ) \

49 do { \

50 int j; \

51 for ( j = 0 ; j < nr ; j++ ) \

52 dst[j] = ((int *)src)[j]; \

53 dst += nr; \

54 } while (0)

55 #endif

 void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)

58 {

59 int i;

61 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __FUNCTION__, count, stride, (void *)out, (void *)data);

         if (stride == 4)

                 COPY_DWORDS(out, data, count);

67 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

70 out++;

71 data += stride;

72 }

73 }

 void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)

76 {

77 int i;

79 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __FUNCTION__, count, stride, (void *)out, (void *)data);

         if (stride == 8)

                 COPY_DWORDS(out, data, count * 2);

85 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

                         out[1] = *(int *)(data + 4);

89 out += 2;

90 data += stride;

91 }

92 }

 void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)

95 {

96 int i;

98 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __FUNCTION__, count, stride, (void *)out, (void *)data);

101

         if (stride == 12) {

                 COPY_DWORDS(out, data, count * 3);

104 }

105 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

                         out[1] = *(int *)(data + 4);

                         out[2] = *(int *)(data + 8);

110 out += 3;

111 data += stride;

112 }

113 }

114

 void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)

116 {

117 int i;

118

119 if (RADEON_DEBUG & RADEON_VERTS)

                 fprintf(stderr, "%s count %d stride %d out %p data %p\n",

                         __FUNCTION__, count, stride, (void *)out, (void *)data);

122

         if (stride == 16)

                 COPY_DWORDS(out, data, count * 4);

125 else

                 for (i = 0; i < count; i++) {

                         out[0] = *(int *)data;

                         out[1] = *(int *)(data + 4);

                         out[2] = *(int *)(data + 8);

                         out[3] = *(int *)(data + 12);

131 out += 4;

132 data += stride;

133 }

134 }

135

 void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,

                          const GLvoid * data, int size, int stride, int count)

138 {

139 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);

140 uint32_t *out;

141

         if (stride == 0) {

                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);

144 count = 1;

145 aos->stride = 0;

146 } else {

                 radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);

148 aos->stride = size;

149 }

150

151 aos->components = size;

152 aos->count = count;

153

         radeon_bo_map(aos->bo, 1);

         out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);

156 switch (size) {

         case 1: radeonEmitVec4(out, data, stride, count); break;

         case 2: radeonEmitVec8(out, data, stride, count); break;

         case 3: radeonEmitVec12(out, data, stride, count); break;

         case 4: radeonEmitVec16(out, data, stride, count); break;

161 default:

162 assert(0);

163 break;

164 }

165 radeon_bo_unmap(aos->bo);

166 }

167

168 void radeon_init_dma(radeonContextPtr rmesa)

169 {

         make_empty_list(&rmesa->dma.free);

         make_empty_list(&rmesa->dma.wait);

         make_empty_list(&rmesa->dma.reserved);

173 rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;

174 }

175

 void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)

177 {

178 struct radeon_dma_bo *dma_bo = NULL;

179 /* we set minimum sizes to at least requested size

180 aligned to next 16 bytes. */

         if (size > rmesa->dma.minimum_size)

                 rmesa->dma.minimum_size = (size + 15) & (~15);

183

         radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",

                         __FUNCTION__, size, rmesa->dma.minimum_size);

186

187

         if (is_empty_list(&rmesa->dma.free)

               || last_elem(&rmesa->dma.free)->bo->size < size) {

190 dma_bo = CALLOC_STRUCT(radeon_dma_bo);

191 assert(dma_bo);

192

193 again_alloc:

                 dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,

                                             0, rmesa->dma.minimum_size, 4,

196 RADEON_GEM_DOMAIN_GTT, 0);

197

198 if (!dma_bo->bo) {

199 rcommonFlushCmdBuf(rmesa, __FUNCTION__);

200 goto again_alloc;

201 }

                 insert_at_head(&rmesa->dma.reserved, dma_bo);

203 } else {

204 /* We push and pop buffers from end of list so we can keep

205 counter on unused buffers for later freeing them from

206 begin of list */

                 dma_bo = last_elem(&rmesa->dma.free);

208 remove_from_list(dma_bo);

                 insert_at_head(&rmesa->dma.reserved, dma_bo);

210 }

211

         rmesa->dma.current_used = 0;

         rmesa->dma.current_vertexptr = 0;

214

         if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,

                                           first_elem(&rmesa->dma.reserved)->bo,

217 RADEON_GEM_DOMAIN_GTT, 0))

                 fprintf(stderr,"failure to revalidate BOs - badness\n");

219

         if (is_empty_list(&rmesa->dma.reserved)) {

221 /* Cmd buff have been flushed in radeon_revalidate_bos */

222 goto again_alloc;

223 }

224 }

225

226 /* Allocates a region from rmesa->dma.current. If there isn't enough

227 * space in current, grab a new buffer (and discard what was left of current)

228 */

229 void radeonAllocDmaRegion(radeonContextPtr rmesa,

                           struct radeon_bo **pbo, int *poffset,

231 int bytes, int alignment)

232 {

233 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);

235

         if (rmesa->dma.flush)

                 rmesa->dma.flush(rmesa->glCtx);

238

         assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);

240

241 alignment--;

         rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;

243

         if (is_empty_list(&rmesa->dma.reserved)

                 || rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)

246 radeonRefillCurrentDmaRegion(rmesa, bytes);

247

         *poffset = rmesa->dma.current_used;

         *pbo = first_elem(&rmesa->dma.reserved)->bo;

250 radeon_bo_ref(*pbo);

251

252 /* Always align to at least 16 bytes */

         rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;

         rmesa->dma.current_vertexptr = rmesa->dma.current_used;

255

         assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);

257 }

258

259 void radeonFreeDmaRegions(radeonContextPtr rmesa)

260 {

261 struct radeon_dma_bo *dma_bo;

262 struct radeon_dma_bo *temp;

263 if (RADEON_DEBUG & RADEON_DMA)

                 fprintf(stderr, "%s\n", __FUNCTION__);

265

         foreach_s(dma_bo, temp, &rmesa->dma.free) {

267 remove_from_list(dma_bo);

268 radeon_bo_unref(dma_bo->bo);

269 FREE(dma_bo);

270 }

271

         foreach_s(dma_bo, temp, &rmesa->dma.wait) {

273 remove_from_list(dma_bo);

274 radeon_bo_unref(dma_bo->bo);

275 FREE(dma_bo);

276 }

277

         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {

279 remove_from_list(dma_bo);

280 radeon_bo_unref(dma_bo->bo);

281 FREE(dma_bo);

282 }

283 }

284

 void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)

286 {

         if (is_empty_list(&rmesa->dma.reserved))

288 return;

289

290 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);

292 rmesa->dma.current_used -= return_bytes;

         rmesa->dma.current_vertexptr = rmesa->dma.current_used;

294 }

295

 static int radeon_bo_is_idle(struct radeon_bo* bo)

297 {

298 uint32_t domain;

         int ret = radeon_bo_is_busy(bo, &domain);

300 if (ret == -EINVAL) {

                 WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"

302 "This may cause small performance drop for you.\n");

303 }

304 return ret != -EBUSY;

305 }

306

307 void radeonReleaseDmaRegions(radeonContextPtr rmesa)

308 {

309 struct radeon_dma_bo *dma_bo;

310 struct radeon_dma_bo *temp;

         const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;

         const int time = rmesa->dma.free.expire_counter;

313

314 if (RADEON_DEBUG & RADEON_DMA) {

315 size_t free = 0,

316 wait = 0,

317 reserved = 0;

                 foreach(dma_bo, &rmesa->dma.free)

319 ++free;

320

                 foreach(dma_bo, &rmesa->dma.wait)

322 ++wait;

323

                 foreach(dma_bo, &rmesa->dma.reserved)

325 ++reserved;

326

                 fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",

                       __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);

329 }

330

         if (!rmesa->radeonScreen->driScreen->dri2.enabled) {

332 /* request updated cs processing information from kernel */

                 legacy_track_pending(rmesa->radeonScreen->bom, 0);

334 }

335 /* move waiting bos to free list.

336 wait list provides gpu time to handle data before reuse */

         foreach_s(dma_bo, temp, &rmesa->dma.wait) {

                 if (dma_bo->expire_counter == time) {

                         WARN_ONCE("Leaking dma buffer object!\n");

340 radeon_bo_unref(dma_bo->bo);

341 remove_from_list(dma_bo);

342 FREE(dma_bo);

343 continue;

344 }

345 /* free objects that are too small to be used because of large request */

                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {

347 radeon_bo_unref(dma_bo->bo);

348 remove_from_list(dma_bo);

349 FREE(dma_bo);

350 continue;

351 }

                 if (!radeon_bo_is_idle(dma_bo->bo))

353 continue;

354 remove_from_list(dma_bo);

355 dma_bo->expire_counter = expire_at;

                 insert_at_tail(&rmesa->dma.free, dma_bo);

357 }

358

359 /* move reserved to wait list */

         foreach_s(dma_bo, temp, &rmesa->dma.reserved) {

361 /* free objects that are too small to be used because of large request */

                 if (dma_bo->bo->size < rmesa->dma.minimum_size) {

363 radeon_bo_unref(dma_bo->bo);

364 remove_from_list(dma_bo);

365 FREE(dma_bo);

366 continue;

367 }

368 remove_from_list(dma_bo);

369 dma_bo->expire_counter = expire_at;

                 insert_at_tail(&rmesa->dma.wait, dma_bo);

371 }

372

373 /* free bos that have been unused for some time */

         foreach_s(dma_bo, temp, &rmesa->dma.free) {

                 if (dma_bo->expire_counter != time)

376 break;

377 remove_from_list(dma_bo);

378 radeon_bo_unref(dma_bo->bo);

379 FREE(dma_bo);

380 }

381

382 }

383

384

385 /* Flush vertices in the current dma region.

386 */

 void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )

388 {

389 radeonContextPtr rmesa = RADEON_CONTEXT(ctx);

         struct radeon_dma *dma = &rmesa->dma;

391

392 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s\n", __FUNCTION__);

394 dma->flush = NULL;

395

         radeon_bo_unmap(rmesa->swtcl.bo);

397

         if (!is_empty_list(&dma->reserved)) {

399 GLuint current_offset = dma->current_used;

400

401 assert (dma->current_used +

                     rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==

403 dma->current_vertexptr);

404

             if (dma->current_used != dma->current_vertexptr) {

406 dma->current_used = dma->current_vertexptr;

407

                     rmesa->vtbl.swtcl_flush(ctx, current_offset);

409 }

             rmesa->swtcl.numverts = 0;

411 }

         radeon_bo_unref(rmesa->swtcl.bo);

413 rmesa->swtcl.bo = NULL;

414 }

415 /* Alloc space in the current dma region.

416 */

417 void *

 rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )

419 {

420 GLuint bytes = vsize * nverts;

421 void *head;

422 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s\n", __FUNCTION__);

424

         if(is_empty_list(&rmesa->dma.reserved)

               ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {

                 if (rmesa->dma.flush) {

                         rmesa->dma.flush(rmesa->glCtx);

429 }

430

431 radeonRefillCurrentDmaRegion(rmesa, bytes);

432

433 return NULL;

434 }

435

         if (!rmesa->dma.flush) {

437 /* if cmdbuf flushed DMA restart */

                 rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;

439 rmesa->dma.flush = rcommon_flush_last_swtcl_prim;

440 }

441

         ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );

         ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );

         ASSERT( rmesa->dma.current_used +

                 rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==

446 rmesa->dma.current_vertexptr );

447

         if (!rmesa->swtcl.bo) {

                 rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;

                 radeon_bo_ref(rmesa->swtcl.bo);

                 radeon_bo_map(rmesa->swtcl.bo, 1);

452 }

453

         head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);

455 rmesa->dma.current_vertexptr += bytes;

456 rmesa->swtcl.numverts += nverts;

457 return head;

458 }

459

 void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )

461 {

462 radeonContextPtr radeon = RADEON_CONTEXT( ctx );

463 int i;

464 if (RADEON_DEBUG & RADEON_IOCTL)

                 fprintf(stderr, "%s\n", __FUNCTION__);

466

    if (radeon->dma.flush) {

        radeon->dma.flush(radeon->glCtx);

469 }

    for (i = 0; i < radeon->tcl.aos_count; i++) {

       if (radeon->tcl.aos[i].bo) {

          radeon_bo_unref(radeon->tcl.aos[i].bo);

          radeon->tcl.aos[i].bo = NULL;

474

475 }

476 }

477 }