git.libre-soc.org Git - mesa.git/blob - src/mesa/drivers/dri/intel/intel

1 /**************************************************************************

2 *

5 *

6 * Permission is hereby granted, free of charge, to any person obtaining a

7 * copy of this software and associated documentation files (the

8 * "Software"), to deal in the Software without restriction, including

9 * without limitation the rights to use, copy, modify, merge, publish,

10 * distribute, sub license, and/or sell copies of the Software, and to

11 * permit persons to whom the Software is furnished to do so, subject to

12 * the following conditions:

13 *

14 * The above copyright notice and this permission notice (including the

15 * next paragraph) shall be included in all copies or substantial portions

16 * of the Software.

17 *

18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS

19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF

20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.

21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR

22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,

23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE

24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

25 *

26 **************************************************************************/

28 #include "intel_context.h"

29 #include "intel_batchbuffer.h"

30 #include "intel_buffer_objects.h"

31 #include "intel_decode.h"

32 #include "intel_reg.h"

33 #include "intel_bufmgr.h"

34 #include "intel_buffers.h"

36 struct cached_batch_item {

37 struct cached_batch_item *next;

38 uint16_t header;

39 uint16_t size;

40 };

 static void clear_cache( struct intel_context *intel )

43 {

    struct cached_batch_item *item = intel->batch.cached_items;

46 while (item) {

       struct cached_batch_item *next = item->next;

48 free(item);

49 item = next;

50 }

52 intel->batch.cached_items = NULL;

53 }

55 void

 intel_batchbuffer_init(struct intel_context *intel)

57 {

58 intel_batchbuffer_reset(intel);

    if (intel->gen == 6) {

61 /* We can't just use brw_state_batch to get a chunk of space for

62 * the gen6 workaround because it involves actually writing to

63 * the buffer, and the kernel doesn't let us write to the batch.

64 */

       intel->batch.workaround_bo = drm_intel_bo_alloc(intel->bufmgr,

66 "gen6 workaround",

67 4096, 4096);

68 }

69 }

71 void

 intel_batchbuffer_reset(struct intel_context *intel)

73 {

    if (intel->batch.last_bo != NULL) {

       drm_intel_bo_unreference(intel->batch.last_bo);

76 intel->batch.last_bo = NULL;

77 }

    intel->batch.last_bo = intel->batch.bo;

80 clear_cache(intel);

    intel->batch.bo = drm_intel_bo_alloc(intel->bufmgr, "batchbuffer",

83 intel->maxBatchSize, 4096);

85 intel->batch.reserved_space = BATCH_RESERVED;

    intel->batch.state_batch_offset = intel->batch.bo->size;

    intel->batch.used = 0;

88 }

90 void

 intel_batchbuffer_save_state(struct intel_context *intel)

92 {

    intel->batch.saved.used = intel->batch.used;

94 intel->batch.saved.reloc_count =

       drm_intel_gem_bo_get_reloc_count(intel->batch.bo);

96 }

98 void

 intel_batchbuffer_reset_to_saved(struct intel_context *intel)

100 {

    drm_intel_gem_bo_clear_relocs(intel->batch.bo, intel->batch.saved.reloc_count);

102

    intel->batch.used = intel->batch.saved.used;

104

105 /* Cached batch state is dead, since we just cleared some unknown part of the

106 * batchbuffer. Assume that the caller resets any other state necessary.

107 */

108 clear_cache(intel);

109 }

110

111 void

 intel_batchbuffer_free(struct intel_context *intel)

113 {

    drm_intel_bo_unreference(intel->batch.last_bo);

    drm_intel_bo_unreference(intel->batch.bo);

    drm_intel_bo_unreference(intel->batch.workaround_bo);

117 clear_cache(intel);

118 }

119

120

121 /* TODO: Push this whole function into bufmgr.

122 */

123 static int

 do_flush_locked(struct intel_context *intel)

125 {

    struct intel_batchbuffer *batch = &intel->batch;

127 int ret = 0;

128

    ret = drm_intel_bo_subdata(batch->bo, 0, 4*batch->used, batch->map);

    if (ret == 0 && batch->state_batch_offset != batch->bo->size) {

       ret = drm_intel_bo_subdata(batch->bo,

132 batch->state_batch_offset,

                                  batch->bo->size - batch->state_batch_offset,

                                  (char *)batch->map + batch->state_batch_offset);

135 }

136

    if (!intel->intelScreen->no_hw) {

138 int ring;

139

       if (intel->gen < 6 || !batch->is_blit) {

141 ring = I915_EXEC_RENDER;

142 } else {

143 ring = I915_EXEC_BLT;

144 }

145

       if (ret == 0)

          ret = drm_intel_bo_mrb_exec(batch->bo, 4*batch->used, NULL, 0, 0, ring);

148 }

149

    if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {

       drm_intel_bo_map(batch->bo, false);

       intel_decode(batch->bo->virtual, batch->used,

153 batch->bo->offset,

                    intel->intelScreen->deviceID, true);

155 drm_intel_bo_unmap(batch->bo);

156

       if (intel->vtbl.debug_batch != NULL)

          intel->vtbl.debug_batch(intel);

159 }

160

    if (ret != 0) {

       fprintf(stderr, "intel_do_flush_locked failed: %s\n", strerror(-ret));

163 exit(1);

164 }

    intel->vtbl.new_batch(intel);

166

167 return ret;

168 }

169

170 int

 _intel_batchbuffer_flush(struct intel_context *intel,

                          const char *file, int line)

173 {

174 int ret;

175

176 /* No batch should be emitted that uses a mapped region, because that would

177 * cause the map to be incoherent with GPU rendering done by the

178 * batchbuffer. To ensure that condition, we assert a condition that is

179 * stronger but easier to implement: that *no* region is mapped.

180 */

    assert(intel->num_mapped_regions == 0);

182

    if (intel->batch.used == 0)

184 return 0;

185

    if (intel->first_post_swapbuffers_batch == NULL) {

       intel->first_post_swapbuffers_batch = intel->batch.bo;

188 drm_intel_bo_reference(intel->first_post_swapbuffers_batch);

189 }

190

    if (unlikely(INTEL_DEBUG & DEBUG_BATCH))

       fprintf(stderr, "%s:%d: Batchbuffer flush with %db used\n", file, line,

               4*intel->batch.used);

194

    intel->batch.reserved_space = 0;

196

197 /* Mark the end of the buffer. */

198 intel_batchbuffer_emit_dword(intel, MI_BATCH_BUFFER_END);

    if (intel->batch.used & 1) {

200 /* Round batchbuffer usage to 2 DWORDs. */

201 intel_batchbuffer_emit_dword(intel, MI_NOOP);

202 }

203

    if (intel->vtbl.finish_batch)

       intel->vtbl.finish_batch(intel);

206

207 intel_upload_finish(intel);

208

209 /* Check that we didn't just wrap our batchbuffer at a bad time. */

210 assert(!intel->no_batch_wrap);

211

212 ret = do_flush_locked(intel);

213

    if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {

       fprintf(stderr, "waiting for idle\n");

       drm_intel_bo_wait_rendering(intel->batch.bo);

217 }

218

219 /* Reset the buffer:

220 */

221 intel_batchbuffer_reset(intel);

222

223 return ret;

224 }

225

226

227 /* This is the only way buffers get added to the validate list.

228 */

229 bool

 intel_batchbuffer_emit_reloc(struct intel_context *intel,

231 drm_intel_bo *buffer,

232 uint32_t read_domains, uint32_t write_domain,

233 uint32_t delta)

234 {

235 int ret;

236

    ret = drm_intel_bo_emit_reloc(intel->batch.bo, 4*intel->batch.used,

238 buffer, delta,

239 read_domains, write_domain);

    assert(ret == 0);

241 (void)ret;

242

243 /*

244 * Using the old buffer offset, write in what the right data would be, in case

245 * the buffer doesn't move and we can short-circuit the relocation processing

246 * in the kernel

247 */

    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);

249

250 return true;

251 }

252

253 bool

 intel_batchbuffer_emit_reloc_fenced(struct intel_context *intel,

255 drm_intel_bo *buffer,

256 uint32_t read_domains,

257 uint32_t write_domain,

258 uint32_t delta)

259 {

260 int ret;

261

    ret = drm_intel_bo_emit_reloc_fence(intel->batch.bo, 4*intel->batch.used,

263 buffer, delta,

264 read_domains, write_domain);

    assert(ret == 0);

266 (void)ret;

267

268 /*

269 * Using the old buffer offset, write in what the right data would

270 * be, in case the buffer doesn't move and we can short-circuit the

271 * relocation processing in the kernel

272 */

    intel_batchbuffer_emit_dword(intel, buffer->offset + delta);

274

275 return true;

276 }

277

278 void

 intel_batchbuffer_data(struct intel_context *intel,

                        const void *data, GLuint bytes, bool is_blit)

281 {

    assert((bytes & 3) == 0);

    intel_batchbuffer_require_space(intel, bytes, is_blit);

    __memcpy(intel->batch.map + intel->batch.used, data, bytes);

    intel->batch.used += bytes >> 2;

286 }

287

288 void

 intel_batchbuffer_cached_advance(struct intel_context *intel)

290 {

    struct cached_batch_item **prev = &intel->batch.cached_items, *item;

    uint32_t sz = (intel->batch.used - intel->batch.emit) * sizeof(uint32_t);

    uint32_t *start = intel->batch.map + intel->batch.emit;

    uint16_t op = *start >> 16;

295

296 while (*prev) {

297 uint32_t *old;

298

299 item = *prev;

       old = intel->batch.map + item->header;

       if (op == *old >> 16) {

          if (item->size == sz && memcmp(old, start, sz) == 0) {

             if (prev != &intel->batch.cached_items) {

304 *prev = item->next;

                item->next = intel->batch.cached_items;

306 intel->batch.cached_items = item;

307 }

             intel->batch.used = intel->batch.emit;

309 return;

310 }

311

312 goto emit;

313 }

314 prev = &item->next;

315 }

316

    item = malloc(sizeof(struct cached_batch_item));

318 if (item == NULL)

319 return;

320

    item->next = intel->batch.cached_items;

322 intel->batch.cached_items = item;

323

324 emit:

325 item->size = sz;

    item->header = intel->batch.emit;

327 }

328

329 /**

330 * Restriction [DevSNB, DevIVB]:

331 *

332 * Prior to changing Depth/Stencil Buffer state (i.e. any combination of

333 * 3DSTATE_DEPTH_BUFFER, 3DSTATE_CLEAR_PARAMS, 3DSTATE_STENCIL_BUFFER,

334 * 3DSTATE_HIER_DEPTH_BUFFER) SW must first issue a pipelined depth stall

335 * (PIPE_CONTROL with Depth Stall bit set), followed by a pipelined depth

336 * cache flush (PIPE_CONTROL with Depth Flush Bit set), followed by

337 * another pipelined depth stall (PIPE_CONTROL with Depth Stall bit set),

338 * unless SW can otherwise guarantee that the pipeline from WM onwards is

339 * already flushed (e.g., via a preceding MI_FLUSH).

340 */

341 void

 intel_emit_depth_stall_flushes(struct intel_context *intel)

343 {

    assert(intel->gen >= 6 && intel->gen <= 7);

345

346 BEGIN_BATCH(4);

347 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

348 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);

    OUT_BATCH(0); /* address */

    OUT_BATCH(0); /* write data */

351 ADVANCE_BATCH()

352

353 BEGIN_BATCH(4);

354 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

355 OUT_BATCH(PIPE_CONTROL_DEPTH_CACHE_FLUSH);

    OUT_BATCH(0); /* address */

    OUT_BATCH(0); /* write data */

358 ADVANCE_BATCH();

359

360 BEGIN_BATCH(4);

361 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

362 OUT_BATCH(PIPE_CONTROL_DEPTH_STALL);

    OUT_BATCH(0); /* address */

    OUT_BATCH(0); /* write data */

365 ADVANCE_BATCH();

366 }

367

368 /**

369 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for

370 * implementing two workarounds on gen6. From section 1.4.7.1

371 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:

372 *

373 * [DevSNB-C+{W/A}] Before any depth stall flush (including those

374 * produced by non-pipelined state commands), software needs to first

375 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=

376 * 0.

377 *

378 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable

379 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.

380 *

381 * And the workaround for these two requires this workaround first:

382 *

383 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent

384 * BEFORE the pipe-control with a post-sync op and no write-cache

385 * flushes.

386 *

387 * And this last workaround is tricky because of the requirements on

388 * that bit. From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM

389 * volume 2 part 1:

390 *

391 * "1 of the following must also be set:

392 * - Render Target Cache Flush Enable ([12] of DW1)

393 * - Depth Cache Flush Enable ([0] of DW1)

394 * - Stall at Pixel Scoreboard ([1] of DW1)

395 * - Depth Stall ([13] of DW1)

396 * - Post-Sync Operation ([13] of DW1)

397 * - Notify Enable ([8] of DW1)"

398 *

399 * The cache flushes require the workaround flush that triggered this

400 * one, so we can't use it. Depth stall would trigger the same.

401 * Post-sync nonzero is what triggered this second workaround, so we

402 * can't use that one either. Notify enable is IRQs, which aren't

403 * really our business. That leaves only stall at scoreboard.

404 */

405 void

 intel_emit_post_sync_nonzero_flush(struct intel_context *intel)

407 {

    if (!intel->batch.need_workaround_flush)

409 return;

410

411 BEGIN_BATCH(4);

412 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

413 OUT_BATCH(PIPE_CONTROL_CS_STALL |

414 PIPE_CONTROL_STALL_AT_SCOREBOARD);

    OUT_BATCH(0); /* address */

    OUT_BATCH(0); /* write data */

417 ADVANCE_BATCH();

418

419 BEGIN_BATCH(4);

420 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

421 OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);

    OUT_RELOC(intel->batch.workaround_bo,

423 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 0);

    OUT_BATCH(0); /* write data */

425 ADVANCE_BATCH();

426

    intel->batch.need_workaround_flush = false;

428 }

429

430 /* Emit a pipelined flush to either flush render and texture cache for

431 * reading from a FBO-drawn texture, or flush so that frontbuffer

432 * render appears on the screen in DRI1.

433 *

434 * This is also used for the always_flush_cache driconf debug option.

435 */

436 void

 intel_batchbuffer_emit_mi_flush(struct intel_context *intel)

438 {

    if (intel->gen >= 6) {

       if (intel->batch.is_blit) {

441 BEGIN_BATCH_BLT(4);

442 OUT_BATCH(MI_FLUSH_DW);

443 OUT_BATCH(0);

444 OUT_BATCH(0);

445 OUT_BATCH(0);

446 ADVANCE_BATCH();

447 } else {

          if (intel->gen == 6) {

449 /* Hardware workaround: SNB B-Spec says:

450 *

451 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache

452 * Flush Enable =1, a PIPE_CONTROL with any non-zero

453 * post-sync-op is required.

454 */

455 intel_emit_post_sync_nonzero_flush(intel);

456 }

457

458 BEGIN_BATCH(4);

459 OUT_BATCH(_3DSTATE_PIPE_CONTROL);

460 OUT_BATCH(PIPE_CONTROL_INSTRUCTION_FLUSH |

461 PIPE_CONTROL_WRITE_FLUSH |

462 PIPE_CONTROL_DEPTH_CACHE_FLUSH |

463 PIPE_CONTROL_VF_CACHE_INVALIDATE |

464 PIPE_CONTROL_TC_FLUSH |

465 PIPE_CONTROL_NO_WRITE |

466 PIPE_CONTROL_CS_STALL);

          OUT_BATCH(0); /* write address */

          OUT_BATCH(0); /* write data */

469 ADVANCE_BATCH();

470 }

    } else if (intel->gen >= 4) {

472 BEGIN_BATCH(4);

473 OUT_BATCH(_3DSTATE_PIPE_CONTROL |

474 PIPE_CONTROL_WRITE_FLUSH |

475 PIPE_CONTROL_NO_WRITE);

       OUT_BATCH(0); /* write address */

       OUT_BATCH(0); /* write data */

       OUT_BATCH(0); /* write data */

479 ADVANCE_BATCH();

480 } else {

481 BEGIN_BATCH(1);

482 OUT_BATCH(MI_FLUSH);

483 ADVANCE_BATCH();

484 }

485 }