Friday, April 14, 2023

Digest for comp.lang.c++@googlegroups.com - 9 updates in 4 topics

Nikki Locke <nikki@trumphurst.com>: Apr 14 10:23PM

Available C++ Libraries FAQ
 
URL: http://www.trumphurst.com/cpplibs/
 
This is a searchable list of libraries and utilities (both free
and commercial) available to C++ programmers.
 
If you know of a library which is not in the list, why not fill
in the form at http://www.trumphurst.com/cpplibs/cppsub.php
 
Maintainer: Nikki Locke - if you wish to contact me, please use the form on the website.
Frederick Virchanza Gotham <cauldwell.thomas@gmail.com>: Apr 14 12:15PM -0700

Since C++11, there has been an implicit conversion from a lambda to a
function pointer so long as the lambda has no captures. If the lambda
has captures, the implicit conversion is disabled. However it's easy to
get a function pointer from a lambda-with-captures if we use global
variables or the heap, something like:
 
std::function<void(void)> f; // global object
 
void Func(void)
{
f(); // invokes the global object
}
 
void Some_Library_Func(void (*const pf)(void))
{
pf();
}
 
int main(int argc, char **argv)
{
auto mylambda = [argc](void) -> void
{
cout << "Hello " << argc << "!" << endl;
};
 
f = mylambda;
 
Some_Library_Func(Func);
}
 
It is possible though to procure a normal function pointer from a
lambda-with-captures without making use of global variables or the heap
-- it can all be kept on the stack.
 
To invoke a capture lambda, we need two pieces of data:
Datum A: The address of the lambda object
Datum B: The address of the 'operator()' member function
 
Datum A is a pointer into data memory.
Datum B is a pointer into code memory.
 
The technique described in this post will only work on CPU's where the
program counter can be set to an address in data memory, and therefore
we will use 'void*' for Datum B rather than 'void(*)(void)'. I'm open
to correction here but I think this technique will work on every
implementation of C++ in existence today, even on microcontrollers such
as the Texas Instruments F28069 and the Arduino Atmel sam3x8e.
 
We will define a simple POD struct to hold these two pieces of data:
 
struct LambdaInfo {
void *data, *code;
};
 
Let's write a function that invokes a capture lambda, passing the 'this'
pointer as the first argument to the member function:
 
void InvokeLambda(LambdaInfo const *const p)
{
void (*pf)(void*) = (void (*)(void*))p->code;
 
return pf(p->data);
}
 
And now let's check what this got compiled to on an x86_64 computer:
 
mov rdx,QWORD PTR [rdi]
mov rax,QWORD PTR [rdi+0x8]
mov rdi,rdx
jmp rax
 
What we've got here is four instructions. To see a little more clearly
what's going on here, I'm going to replace the function arguments with
numerical constants:
 
void InvokeLambda(void)
{
void (*pf)(void*) = (void (*)(void*))0x1122334455667788;
 
return pf( (void*)0x99aabbccddeeffee );
}
 
gets compiled to:
 
movabs rdi,0x99aabbccddeeffee
movabs rax,0x1122334455667788
jmp rax
 
What we've got here now is three simple instructions. Here's the
assembler alongside the machine code:
 
movabs rdi,0x99aabbccddeeffee 48 bf ee ff ee dd cc bb aa 99
movabs rax,0x1122334455667788 48 b8 88 77 66 55 44 33 22 11
jmp rax ff e0
 
What we have here is 22 bytes worth of CPU instructions, which we can
put into a byte array as follows:
 
char unsigned instructions[22u] = {
0x48, 0xBF,
0xEE, 0xFF, 0xEE, 0xDD, 0xCC, 0xBB, 0xAA, 0x99,
0x48, 0xB8,
0x88, 0x77, 0x66, 0x55, 0x44, 0x33, 0x22, 0x11,
0xFF, 0xE0,
};
 
This 22-byte array can be our thunk. I'll write a class to manage the
thunk:
 
class LambdaThunk {
 
char unsigned instructions[22u];
 
void SetData(void const volatile *const p) volatile
{
char unsigned const volatile *const q =
(char unsigned const volatile *)&p;
 
this->instructions[2] = q[0];
this->instructions[3] = q[1];
this->instructions[4] = q[2];
this->instructions[5] = q[3];
this->instructions[6] = q[4];
this->instructions[7] = q[5];
this->instructions[8] = q[6];
this->instructions[9] = q[7];
}
 
void SetCode(void const volatile *const p) volatile
{
char unsigned const volatile *const q =
(char unsigned const volatile *)&p;
 
this->instructions[12] = q[0];
this->instructions[13] = q[1];
this->instructions[14] = q[2];
this->instructions[15] = q[3];
this->instructions[16] = q[4];
this->instructions[17] = q[5];
this->instructions[18] = q[6];
this->instructions[19] = q[7];
}
 
public:
 
LambdaThunk(void) // set the opcodes
{
this->instructions[ 0u] = 0x48u; // movabs rdi
this->instructions[ 1u] = 0xBFu;
this->instructions[10u] = 0x48u; // movabs rax
this->instructions[11u] = 0xB8u;
this->instructions[20u] = 0xFFu; // jmp rax
this->instructions[21u] = 0xE0u;
}
 
template<typename LambdaType>
void AdaptFrom(LambdaType &arg) volatile
{
this->SetData(&arg);
this->SetCode( (void*)&LambdaType::operator() );
// The previous line works fine with GNU g++
}
 
template<typename LambdaType>
LambdaThunk(LambdaType &arg) : LambdaThunk() // set opcodes
{
this->AdaptFrom<LambdaType>(arg);
}
 
void (*getfuncptr(void) const volatile)(void)
{
return (void(*)(void))&this->instructions;
}
};
 
And now let's write some test code to try it out:
 
#include <iostream> // cout, endl
using std::cout;
using std::endl;
 
void Some_Library_Func( void (*const pf)(void) )
{
pf();
}
 
int main(int argc, char **argv)
{
auto mylambda = [argc](void) -> void
{
std::cout << "Hello " << argc << "!" << std::endl;
};
 
Some_Library_Func( LambdaThunk(mylambda).getfuncptr() );
 
cout << "Last line in Main" << endl;
}
 
This works fine, you can see it up on Godbolt here:
 
https://godbolt.org/z/r84hEsG1G
 
Things get a little more complicated if the lambda has a return value,
and several parameters. For example if the lambda returns a struct
containing 17 int's, 33 double's, and if the lambda takes 18 parameters,
then the assembler for 'InvokeLambda' is a little more complicated:
 
struct ReturnType {
int a[17];
double b[33];
void (*c)(int);
std::string d;
};
 
ReturnType InvokeLambda(int arg1, double arg2, float arg3,
int arg4, double arg5, float arg6,
int arg7, double arg8, float arg9,
int arg10, double arg11, float arg12,
int arg13, double arg14, float arg15,
int arg16, double arg17, float arg18)
{
ReturnType (*pf)(void volatile *,int,double,float,
int,double,float,
int,double,float,
int,double,float,
int,double,float,
int,double,float)
= (ReturnType (*volatile)(void volatile*,
int,double,float,
int,double,float,
int,double,float,
int,double,float,
int,double,float,
int,double,float))0x1122334455667788;
 
return pf( (void volatile *volatile)0x99aabbccddeeffee,
arg1,arg2,arg3,arg4,arg5,arg6,
arg7,arg8,arg9,arg10,arg11,arg12,
arg13,arg14,arg15,arg16,arg17,arg18);
}
 
 
gets compiled to:
 
push rbx
movss xmm8,DWORD PTR [rsp+0x30]
mov rbx,rdi
sub rsp,0x8
movss DWORD PTR [rsp],xmm8
push QWORD PTR [rsp+0x30]
mov eax,DWORD PTR [rsp+0x30]
push rax
movss xmm8,DWORD PTR [rsp+0x30]
movabs rax,0x1122334455667788
sub rsp,0x8
movss DWORD PTR [rsp],xmm8
push QWORD PTR [rsp+0x30]
push r9
mov r9d,r8d
mov r8d,ecx
mov ecx,edx
mov edx,esi
movabs rsi,0x99aabbccddeeffee
call rax
mov rax,rbx
add rsp,0x30
pop rbx
ret
 
which is 94 bytes worth of instructions instead of 22. Still manageable.
 
I'm not suggesting that a lambda-with-captures should convert implicitly
to a function pointer, because then we'd have the issue of the lifetime
of the thunk.
 
Anyone got any thoughts on this?
David Brown <david.brown@hesbynett.no>: Apr 14 09:58PM +0200

On 14/04/2023 21:15, Frederick Virchanza Gotham wrote:
> has captures, the implicit conversion is disabled. However it's easy to
> get a function pointer from a lambda-with-captures if we use global
> variables or the heap, something like:
 
<snip>
 
 
> Anyone got any thoughts on this?
 
My first thought is "why?". What are you actually trying to achieve
that is so crucial as to be worth this monstrosity? I know (from
previous threads) that you enjoy figuring out this kind of thing, and
fun and curiosity are perfectly good reasons. But I would like to know
if there are any other reasons or use-cases for this.
"Alf P. Steinbach" <alf.p.steinbach@gmail.com>: Apr 14 11:32PM +0200

On 2023-04-14 9:15 PM, Frederick Virchanza Gotham wrote:
 
> f = mylambda;
 
> Some_Library_Func(Func);
> }
That idea works for a more general portable solution.
 
An assembly language solution, as you presented (snipped), can use less
space, which can be important for embedded system programming, but we're
talking a handful of byte per thunk.
 
So I just put a generous capacity of 97 recursive calls in this code:
 
 
#include <array>
#include <functional>
#include <utility>
 
#include <type_traits>
 
#include <assert.h> // assert
#include <stddef.h> // size_t
#include <fmt/core.h> // <url: https://github.com/fmtlib/fmt>, for
output.
 
namespace my {
using std::array, // <array>
std::function, // <functional>
std::index_sequence, std::make_index_sequence; // <utility>
 
template< class T > using type_ = T;
template< class T > using ref_ = T&;
template< class T > using in_ = ref_<const T>;
 
template< class Result, class... Params >
class Thunked_
{
using Func = auto( Params... ) -> Result;
using Wrapped_func = function<Func>;
 
int m_index;
 
enum{ capacity = 97 };
 
static inline Wrapped_func the_wrapped_funcs[capacity];
 
template< size_t i >
static auto invoke_( Params... params )
-> Result
{ return the_wrapped_funcs[i]( params... ); }
 
template< size_t... indices >
static auto make_invokers( index_sequence<indices...> )
-> array<Func*, capacity>
{
static_assert( sizeof...( indices ) == capacity );
return { &invoke_<indices>... };
}
 
using Indices = make_index_sequence<capacity>;
static inline array<Func*, capacity> the_invokers =
make_invokers( Indices() );
static inline int n_in_use = 0;
 
public:
Thunked_( Wrapped_func f ):
m_index( n_in_use )
{
assert( n_in_use < capacity ); // TODO: exception, not
assertion.
 
the_wrapped_funcs[m_index] = move( f );
++n_in_use;
}
 
Thunked_( in_<Thunked_> ) = delete;
auto operator=( in_<Thunked_> ) -> ref_<Thunked_> = delete;
 
~Thunked_() { --n_in_use; }
 
auto func_ptr() const -> Func* { return the_invokers[m_index]; }
operator Func*() const { return func_ptr(); }
};
} // namespace my
 
namespace app {
using std::function; // <functional>
 
function<void()> global_f;
 
void call_f()
{
global_f(); // invokes the global object
}
 
void some_library_func( void pf() )
{
pf();
}
 
void run()
{
const int x = 42;
const auto my_lambda = [&x]() -> void
{
fmt::print( "Hello {:d}!\n", x );
};
 
#ifndef USE_GLOBAL
some_library_func( my::Thunked_<void>( my_lambda ) );
#else
// Original example code using global:
global_f = my_lambda;
some_library_func( &call_f );

No comments: