Wednesday, February 27, 2019

Digest for comp.lang.c++@googlegroups.com - 11 updates in 2 topics

"Chris M. Thomasson" <invalid_chris_thomasson_invalid@invalid.com>: Feb 21 01:28PM -0800

On 2/20/2019 2:33 PM, Melzzzzz wrote:
>>> Who would do this in year 2019? Recompile in 64-bit and measure again!
 
>> Just tested with a x64 build, and here are the results wrt version 0.1:
 
>> Testing Version 0.1: Chris M. Thomasson's Experimental Read/Write Mutex
[...]
>> Nice! Humm... Iirc, unsigned long on Windows is only 32-bit?
 
> Yep. And long double is 64 bits if even hardware provides 80 bits...
 
Fwiw, on Intel, I want fetch_add to use LOCK XADD, this is very
important. Let's check out a little test on GodBolt:
 
https://godbolt.org/z/yruhAL // using long
 
https://godbolt.org/z/NKYDeI // using short
 
Wrt my ct_rwmutex, it better use LOCK XADD no matter what damn it.
"Chris M. Thomasson" <invalid_chris_thomasson_invalid@invalid.com>: Feb 21 01:01PM -0800

On 2/20/2019 10:03 PM, Chris M. Thomasson wrote:
>     }
 
>     return cmp;
> }
[...]
 
Notice how I am using compare_exchange_strong? I think, this thought is
before testing, that your algorihtm should work fine with
compare_exchange_weak. The weak version does not impose some rather
hefty requirements on the CAS. It is more "friendly" on systems that use
LL/SC to emulate CAS.
"Chris M. Thomasson" <invalid_chris_thomasson_invalid@invalid.com>: Feb 21 03:42PM -0800

On 2/20/2019 10:03 PM, Chris M. Thomasson wrote:
>> This is similar shared_mutex, but just for Win32 only.
>> My shared mutex gives shared readers always priority over
>> writers (don't know what std::shared_mutex does instead).
 
Our algorithms are getting similar reads, but mine and std::shared_mutex
get many more writes. This is due to your explicit writer starvation,
aka reader priority over all else. Also, your looping on CAS can give my
loopless algorithm an advantage wrt more intensive reads for longer
periods of time. The code at the end of this post uses your raw
algorihtm posted as-is, I included windows.h and intrin.h.
 
It tests your work against std::shared_mutex, and should compile with
MSVC 2017. Have not tested it on GCC yet. I can port your algorihtm to
pure C++11, but then it would have to use event and semaphore created
with mutex/condvar, and not Windows primitives directly.
 
Keep in mind that my work is using 100% pure C++11, and yours is using
windows specific primitives.
 
Afaict, your work is faster than std::shared_mutex on my end, but
consistently suffers from writer starvation. This can be an issue. Well,
fwiw, here is code for a test of your algorihtm vs std::shared_mutex, it
should compile for you wrt windows:
 
_________________________________________________
 
/* Crude Read/Write Mutex Benchmark
 
This tests Bonita's algorithm SharedMutex vs. std::shared_mutex.
 
It shows how many reads and writes can be performed within
a fixed amount of time.
 
by: Chris M. Thomasson
 
version 0.1
__________________________________________*/
 
 
 
#include <thread>
#include <atomic>
#include <shared_mutex>
#include <condition_variable>
#include <iostream>
#include <functional>
#include <chrono>
#include <cassert>
#include <cstdlib>
#include <ctime>
#include <climits>
#include <cstdint>
#include <vector>
 
 
// undefine to test std::shared_mutex
#define CT_TEST_FAST_MUTEX 1
 
 
#define THREADS 8 // multiplied by std::hardware_concurrency
#define NODE_PRIME 1000000 // number of nodes
#define CRUDE_CACHE_PAD 256 // crude cache pad
#define TEST_DURATION_SEC 60 // number of seconds for the test
 
 
 
 
 
// Bonita work start
#define WIN32_LEAN_AND_MEAN
 
 
// Bonita Montero Read/Write Mutex
// Interface modified for std::shared_mutex compatibility.
#include <Windows.h>
#include <intrin.h>
#include <cassert>
 
class SharedMutex
{
public:
SharedMutex();
~SharedMutex();
void lock();
void unlock();
void lock_shared();
void unlock_shared();
 
private:
volatile LONG m_lRWCounters; // lower 16 bits: writers including
the one that owns the lock
// bits 16-30: count of threads
having read-access or waiting for reac-access
// sign-bit: readers active
volatile DWORD m_dwOwningThreadId;
volatile DWORD m_dwRecursionCount;
HANDLE m_hEvtReleaseWriter;
HANDLE m_hSemReleaseReaders;
};
 
 
SharedMutex::SharedMutex()
{
m_lRWCounters = 0;
m_dwOwningThreadId = 0;
m_dwRecursionCount = 0;
m_hEvtReleaseWriter = CreateEvent(NULL, FALSE, FALSE, NULL);;
m_hSemReleaseReaders = CreateSemaphore(NULL, 0, 0x7FFFFFFF, NULL);
}
 
SharedMutex::~SharedMutex()
{
assert(m_dwOwningThreadId == 0);
assert(m_dwRecursionCount == 0);
 
CloseHandle(m_hEvtReleaseWriter);
CloseHandle(m_hSemReleaseReaders);
}
 
void SharedMutex::lock()
{
DWORD dwCurrentThreadId;
 
if (m_dwOwningThreadId == (dwCurrentThreadId = GetCurrentThreadId()))
{
m_dwRecursionCount++;
return;
}
 
LONG lRWCounters,
lOldRWCounters;
 
for (lRWCounters = m_lRWCounters; ; lRWCounters = lOldRWCounters)
{
assert((lRWCounters & 0x0FFFF) != 0x0FFFF);
 
if ((lOldRWCounters =
_InterlockedCompareExchange(&m_lRWCounters, lRWCounters + 1,
lRWCounters)) == lRWCounters)
if (lOldRWCounters == 0)
{
m_dwOwningThreadId = dwCurrentThreadId;
return;
}
else
break;
 
}
 
WaitForSingleObject(m_hEvtReleaseWriter, INFINITE);
m_dwOwningThreadId = dwCurrentThreadId;
}
 
void SharedMutex::unlock()
{
DWORD dwRecursionCount;
 
assert(m_dwOwningThreadId == GetCurrentThreadId());
 
if ((dwRecursionCount = m_dwRecursionCount) != 0)
{
m_dwRecursionCount = dwRecursionCount - 1;
return;
}
 
m_dwOwningThreadId = 0;
 
LONG lRWCounters,
lOldRWCounters;
 
// sharers have priority over writers
 
for (lRWCounters = m_lRWCounters; ; lRWCounters = lOldRWCounters)
{
assert(lRWCounters > 0);
 
if ((lRWCounters & 0x7FFF0000))
{
if ((lOldRWCounters =
_InterlockedCompareExchange(&m_lRWCounters, (lRWCounters - 1) |
(LONG)0x80000000u, lRWCounters)) == lRWCounters)
{
ReleaseSemaphore(m_hSemReleaseReaders, (lRWCounters &
0x7FFF0000) >> 16, NULL);
return;
}
}
else
if ((lOldRWCounters =
_InterlockedCompareExchange(&m_lRWCounters, lRWCounters - 1,
lRWCounters)) == lRWCounters)
{
if ((lOldRWCounters & 0x0FFFF) > 1)
SetEvent(m_hEvtReleaseWriter);
return;
}
}
}
 
void SharedMutex::lock_shared()
{
LONG lRWCounters,
lOldRWCounters;
 
for (lRWCounters = m_lRWCounters; ; lRWCounters = lOldRWCounters)
{
assert((lRWCounters & 0x7FFF0000) != 0x7FFF0000);
 
// even shared access can be recursively, but in this case the
sharer-count will increment
 
if (lRWCounters <= 0)
{
if ((lOldRWCounters =
_InterlockedCompareExchange(&m_lRWCounters, (lRWCounters + 0x10000) |
(LONG)0x80000000u, lRWCounters)) == lRWCounters)
return;
}
else
if ((lOldRWCounters =
_InterlockedCompareExchange(&m_lRWCounters, (lRWCounters + 0x10000),
lRWCounters)) == lRWCounters)
{
WaitForSingleObject(m_hSemReleaseReaders, INFINITE);
return;
}
}
}
 
void SharedMutex::unlock_shared()
{
LONG lRWCounters,
lOldRWCounters;
 
for (lRWCounters = m_lRWCounters; ; lRWCounters = lOldRWCounters)
{
assert(lRWCounters < 0 && (lRWCounters & 0x7FFF0000) != 0);
 
if ((lRWCounters & 0x7FFF0000) == 0x10000)
{
if ((lOldRWCounters =
_InterlockedCompareExchange(&m_lRWCounters, (lRWCounters - 0x10000) &
(LONG)0x7FFFFFFF, lRWCounters)) == lRWCounters)
{
if ((lRWCounters & 0x0FFFF) != 0)
SetEvent(m_hEvtReleaseWriter);
return;
}
}
else
if ((lOldRWCounters =
_InterlockedCompareExchange(&m_lRWCounters, lRWCounters - 0x10000,
lRWCounters)) == lRWCounters)
return;
}
}
// Bonita work end
 
 
 
 
 
struct ct_simple_stack
{
struct node
{
node* volatile m_next; // to suppress optimization
unsigned int m_tid;
 
node(unsigned int tid) : m_tid(tid) {}
};
 
node* m_head;
 
ct_simple_stack() : m_head(nullptr) {}
 
~ct_simple_stack()
{
ct_simple_stack::node* n = flush();
 
while (n)
{
ct_simple_stack::node* next = n->m_next;
delete n;
n = next;
}
}
 
void push(node* n)
{
n->m_next = m_head;
m_head = n;
}
 
node* pop()
{
node* n = m_head;
 
if (n)
{
m_head = n->m_next;
}
 
return n;
}
 
 
node* flush()
{
node* n = m_head;
m_head = nullptr;
return n;
}
 
};
 
 
struct ct_shared
{
std::atomic<bool> m_run;
 
// protected by m_std_rwmutex
std::uint64_t m_reads;
std::uint64_t m_writes;
 
ct_simple_stack m_stack;
ct_simple_stack m_stack_dtor;
 
#if defined (CT_TEST_FAST_MUTEX)
//ct_rwmutex m_std_rwmutex;
SharedMutex m_std_rwmutex;
#else
std::shared_mutex m_std_rwmutex;

No comments: