std::mutex m;
   std::condition_variable cv;
+  std::unique_lock<std::mutex> l(m);
 
   std::atomic<Tp> a(val1);
   std::thread t([&]
                {
+                 {
+                   // This ensures we block until cv.wait(l) starts.
+                   std::lock_guard<std::mutex> ll(m);
+                 }
                  cv.notify_one();
                  a.wait(val1);
                  if (a.load() != val2)
                    a = val1;
                });
-  std::unique_lock<std::mutex> l(m);
   cv.wait(l);
   std::this_thread::sleep_for(100ms);
   a.store(val2);
 
   std::mutex m;
   std::condition_variable cv;
+  std::unique_lock<std::mutex> l(m);
 
   std::atomic<Tp> a(val1);
   std::thread t([&]
                {
+                 {
+                   // This ensures we block until cv.wait(l) starts.
+                   std::lock_guard<std::mutex> ll(m);
+                 }
                  cv.notify_one();
                  a.wait(val1);
                  auto v = a.load();
                  if (__builtin_memcmp(&v, &val2, sizeof(Tp)) != 0)
                    a = val1;
                });
-  std::unique_lock<std::mutex> l(m);
   cv.wait(l);
   std::this_thread::sleep_for(100ms);
   a.store(val2);
 
   std::mutex m;
   std::condition_variable cv;
+  std::unique_lock<std::mutex> l(m);
 
   std::atomic<Tp> a(val1);
   std::thread t([&]
                {
+                 {
+                   // This ensures we block until cv.wait(l) starts.
+                   std::lock_guard<std::mutex> ll(m);
+                 }
                  cv.notify_one();
                  std::atomic_wait(&a, val1);
                  if (a.load() != val2)
                    a = val1;
                });
-  std::unique_lock<std::mutex> l(m);
   cv.wait(l);
   std::this_thread::sleep_for(100ms);
   a.store(val2);
 
   std::mutex m;
   std::condition_variable cv;
+  std::unique_lock<std::mutex> l(m);
 
   std::atomic<Tp> a(val1);
   std::thread t([&]
                {
+                 {
+                   // This ensures we block until cv.wait(l) starts.
+                   std::lock_guard<std::mutex> ll(m);
+                 }
                  cv.notify_one();
                  std::atomic_wait(&a, val1);
                  auto v = a.load();
                  if (__builtin_memcmp(&v, &val2, sizeof(Tp)) != 0)
                    a = val1;
                });
-  std::unique_lock<std::mutex> l(m);
   cv.wait(l);
   std::this_thread::sleep_for(100ms);
   a.store(val2);