soft and program: Digest for comp.lang.c++@googlegroups.com

comp.lang.c++@googlegroups.com

Google Groups

Tricky ... - 7 Updates
How to get mantissa of long double? - 7 Updates
Most efficient prefetching distance - 2 Updates

RadicalRabbit@theburrow.co.uk: Oct 03 08:25AM

On Fri, 1 Oct 2021 13:11:21 -0700

>Yeah, they could. Humm... I am working on another animation using a more
>robust coloring algorithm for the field lines. Here is an example:

>https://fractalforums.org/gallery/1612-300921002353.png

That looks much more sophisticated.

"Chris M. Thomasson" <chris.m.thomasson.1@gmail.com>: Oct 03 01:21PM -0700

>> robust coloring algorithm for the field lines. Here is an example:

>> https://fractalforums.org/gallery/1612-300921002353.png

> That looks much more sophisticated.

It does! Thanks. Fwiw, here is another interesting zoom on a spiral
formation using the experimental coloring algorihtm:

https://fractalforums.org/gallery/1612-031021065422.png

Now, when I am finished with my new animation, I am going to cycle the
colors using an interpolation across the frames. It should look really
strange, and mesmerizing. That's the goal. ;^)

"Chris M. Thomasson" <chris.m.thomasson.1@gmail.com>: Oct 03 01:33PM -0700

On 10/1/2021 1:54 PM, Branimir Maksimovic wrote:
>>> Learn More

>> That's odd. The links I posted work for me. Just checked them. Humm...
> Safari, iCloud private relay.... ipv6

Humm... Can you see any of my work on YouTube? Here is my channel:

https://www.youtube.com/channel/UC_DhsJu-AbQ6Msnxdf8z6Kg

Does that link work for you?

Branimir Maksimovic <branimir.maksimovic@icloud.com>: Oct 03 09:06PM

> Now, when I am finished with my new animation, I am going to cycle the
> colors using an interpolation across the frames. It should look really
> strange, and mesmerizing. That's the goal. ;^)
What about this (not mine work):
https://www.icloud.com/iclouddrive/0sLxM6sxtszTkGEX_3YwH57bA#mandelbrot
proggy:
format ELF64 executable 3
entry start
segment readable executable
;==============================================================================
macro IACA_START {
; mov ebx,111
; db 0x64,0x67,0x90
}
macro IACA_END {
; mov ebx,222
; db 0x64,0x67,0x90
}
;==============================================================================
align 16
compute_color:
; in: <ymm0>, <ymm1> normalized x, y coordinates
; out: <ymm0>, <ymm1>, <ymm2> r, g, b color components
;------------------------------------------------------------------------------
vaddps ymm0,ymm0,[.c_xoffset]
vxorps ymm2,ymm2,ymm2 ; ymm2 = x
vmovaps ymm3,ymm2 ; ymm3 = y
vmovaps ymm4,ymm2 ; ymm4 = x^2
vmovaps ymm5,ymm2 ; ymm5 = y^2
vxorps ymm8,ymm8,ymm8
mov eax,256
align 32
.loop:
IACA_START ; 12.00 cycles on HSW
vmulps ymm3,ymm2,ymm3 ; ymm3 = x*y
vsubps ymm2,ymm4,ymm5 ; ymm2 = x^2 - y^2
vaddps ymm3,ymm3,ymm3 ; ymm3 = 2*x*y
vaddps ymm2,ymm2,ymm0 ; ymm2 = xn = x^2 - y^2 + a
vaddps ymm3,ymm3,ymm1 ; ymm3 = yn = 2*x*y + b
vmulps ymm4,ymm2,ymm2 ; ymm4 = xn^2
vmulps ymm5,ymm3,ymm3 ; ymm5 = yn^2
vaddps ymm6,ymm2,ymm3 ; ymm6 = xn^2 + yn^2 = m2
vcmpltps ymm7,ymm6,[.c_4_0]
vandnps ymm7,ymm7,[.c_1_0]
vaddps ymm8,ymm8,ymm7
sub eax,1
jnz .loop
IACA_END
vmovaps ymm0,ymm8
vdivps ymm0,ymm0,[.c_255_0]
vmulps ymm0,ymm0,ymm0
vmulps ymm0,ymm0,ymm0
vmulps ymm0,ymm0,ymm0
vmovaps ymm1,ymm0
vmovaps ymm2,ymm0
ret
align 32
.c_4_0: dd 8 dup 4.0
.c_1_0: dd 8 dup 1.0
.c_255_0: dd 8 dup 255.0
.c_xoffset: dd 8 dup -0.5
;==============================================================================
align 16
thread:
;------------------------------------------------------------------------------
virtual at rsp
.startx dd ?
.starty dd ?
end virtual
push rbp rbx
mov rbp,rsp
sub rsp,32
and rsp,-32
.tile:
mov r15d,1
lock xadd [image_tile],r15d
cmp r15d,TILE_COUNT
jge .finish
xor edx,edx
mov eax,r15d
mov edi,TILE_X_COUNT
div edi
imul eax,TILE_SIZE
imul edx,TILE_SIZE
mov [.startx],edx
mov [.starty],eax
imul eax,IMAGE_SIZE
add eax,edx
shl eax,2
mov rbx,[image_ptr]
add rbx,rax
xor r14d,r14d
.row:
xor r13d,r13d
align 16
.pixel:
mov edx,[.startx]
mov eax,[.starty]
add edx,r13d
add eax,r14d
;------------------------------------ compute color
vmovaps ymm2,[image_size]
vmovaps ymm3,ymm2
vrcpps ymm3,ymm3
vcvtsi2ss xmm0,xmm0,edx
vcvtsi2ss xmm1,xmm1,eax
vbroadcastss ymm0,xmm0
vbroadcastss ymm1,xmm1
vaddps ymm0,ymm0,[.c_01234567f]
vaddps ymm1,ymm1,ymm1
vaddps ymm0,ymm0,ymm0
vsubps ymm0,ymm0,ymm2
vsubps ymm1,ymm1,ymm2
vmulps ymm0,ymm0,ymm3
vmulps ymm1,ymm1,ymm3
call compute_color
;------------------------------------ convert color from RGB32F to BGRA8
vxorps ymm3,ymm3,ymm3
vmovaps ymm4,[.c_1_0]
vminps ymm0,ymm0,ymm4
vminps ymm1,ymm1,ymm4
vminps ymm2,ymm2,ymm4
vmaxps ymm0,ymm0,ymm3
vmaxps ymm1,ymm1,ymm3
vmaxps ymm2,ymm2,ymm3
vmovaps ymm3,[.c_255_0]
vmulps ymm0,ymm0,ymm3
vmulps ymm1,ymm1,ymm3
vmulps ymm2,ymm2,ymm3
vcvttps2dq ymm0,ymm0
vcvttps2dq ymm1,ymm1
vcvttps2dq ymm2,ymm2
vpslld ymm1,ymm1,8
vpslld ymm0,ymm0,16
vpor ymm0,ymm0,[.c_ff000000]
vpor ymm1,ymm1,ymm2
vpor ymm0,ymm0,ymm1
vmovdqa [rbx+r13*4],ymm0
;------------------------------------
add r13d,8
cmp r13d,TILE_SIZE
jne .pixel
add rbx,IMAGE_SIZE*4
add r14d,1
cmp r14d,TILE_SIZE
jne .row
jmp .tile
.finish:
mov rsp,rbp
pop rbx rbp
ret
align 32
.c_01234567f: dd 0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0
.c_1_0: dd 8 dup 1.0
.c_255_0: dd 8 dup 255.0
.c_ff000000: dd 8 dup 0xff000000
;==============================================================================
align 16
mem_alloc:
; in: <rdi> size in bytes
; out: <rax> pointer to the allocated memory
;------------------------------------------------------------------------------
mov eax,9 ; sys_mmap
mov rsi,rdi ; length
xor edi,edi ; addr
mov edx,0x1+0x2 ; PROT_READ | PROT_WRITE
mov r10d,0x02+0x20 ; MAP_PRIVATE | MAP_ANONYMOUS
mov r8,-1 ; fd
xor r9d,r9d ; offset
syscall
ret
;==============================================================================
align 16
thread_create:
; in: <rbx> mutex address
;------------------------------------------------------------------------------
mov edi,4096
call mem_alloc
mov rsi,rax
add rsi,4096
mov eax,56 ; sys_clone
mov edi,0x100+0x200+0x400+0x800+0x10000
xor edx,edx
syscall
test eax,eax
jnz .ret
call thread
mov dword [rbx],1
mov eax,202 ; sys_futex
mov rdi,rbx ; mutex address
mov esi,1 ; FUTEX_WAKE
mov edx,1 ; wake 1 thread
syscall
mov eax,60 ; sys_exit
xor edi,edi ; exit code
syscall
.ret:
ret
;==============================================================================
align 16
thread_wait:
; in: <rbx> mutex address
;------------------------------------------------------------------------------
mov eax,202 ; sys_futex
mov rdi,rbx ; mutex address
mov esi,0 ; FUTEX_WAIT
mov edx,0 ; mutex 'running' value
xor r10d,r10d ; unused but must be zero
syscall
ret
;==============================================================================
align 16
image_save:
;------------------------------------------------------------------------------
push rbx
mov eax,85
mov rdi,.tga_name
mov esi,110000000b
syscall
mov rbx,rax
mov eax,1
mov rdi,rbx
mov rsi,.tga_head
mov edx,18
syscall
mov eax,1
mov rdi,rbx
mov rsi,[image_ptr]
mov edx,IMAGE_SIZE*IMAGE_SIZE*4
syscall
pop rbx
ret
.tga_name db 'mandelbrot.tga',0
.tga_head db 0,0,2,9 dup 0
db (IMAGE_SIZE and 0x00ff),(IMAGE_SIZE and 0xff00) shr 8
db (IMAGE_SIZE and 0x00ff),(IMAGE_SIZE and 0xff00) shr 8,32,0
;==============================================================================
align 16
main:
;------------------------------------------------------------------------------
mov rdi,IMAGE_SIZE*IMAGE_SIZE*4
call mem_alloc
mov [image_ptr],rax
mov rbx,thread0_mutex
call thread_create
mov rbx,thread1_mutex
call thread_create
mov rbx,thread2_mutex
call thread_create
mov rbx,thread3_mutex
call thread_create
mov rbx,thread0_mutex
call thread_wait
mov rbx,thread1_mutex
call thread_wait
mov rbx,thread2_mutex
call thread_wait
mov rbx,thread3_mutex
call thread_wait
call image_save
ret
;==============================================================================
align 16
start:
;------------------------------------------------------------------------------
call main
mov eax,60 ; sys_exit
xor edi,edi ; exit code
syscall
;==============================================================================
segment readable writeable

align 4
thread0_mutex dd 0
thread1_mutex dd 0
thread2_mutex dd 0
thread3_mutex dd 0

align 8
image_ptr dq 0
align 4
image_tile dd 0

TILE_SIZE = 64
TILE_X_COUNT = IMAGE_SIZE / TILE_SIZE
TILE_Y_COUNT = IMAGE_SIZE / TILE_SIZE
TILE_COUNT = TILE_X_COUNT * TILE_Y_COUNT
IMAGE_SIZE = 1024

align 32
image_size: dd 8 dup 1024.0
;==============================================================================

--

7-77-777
Evil Sinner!
to weak you should be meek, and you should brainfuck stronger
https://github.com/rofl0r/chaos-pp

Branimir Maksimovic <branimir.maksimovic@icloud.com>: Oct 03 09:07PM

> Humm... Can you see any of my work on YouTube? Here is my channel:

> https://www.youtube.com/channel/UC_DhsJu-AbQ6Msnxdf8z6Kg

> Does that link work for you?

Bookmarked, will enjoy, THANKS!

--

7-77-777
Evil Sinner!
to weak you should be meek, and you should brainfuck stronger
https://github.com/rofl0r/chaos-pp

"Chris M. Thomasson" <chris.m.thomasson.1@gmail.com>: Oct 03 02:33PM -0700

On 10/3/2021 2:06 PM, Branimir Maksimovic wrote:
> https://www.icloud.com/iclouddrive/0sLxM6sxtszTkGEX_3YwH57bA#mandelbrot
> proggy:
> format ELF64 executable 3
[...]

Definite field lines. Not sure if its using binary decomposition.
Actually, check these out. Its from a field line algorithm I created for
escape time fractals:

https://fractalforums.org/fractal-mathematics-and-new-theories/28/plotting-field-lines-during-iteration/4233

It was mentioned here:

https://en.wikibooks.org/wiki/Fractals/Iterations_in_the_complex_plane/MandelbrotSetExteriorComplex_potential

Search for my last name in the page:

http://www.fractalforums.com/new-theories-and-research/plotting-field-lines-during-iteration/

"Chris M. Thomasson" <chris.m.thomasson.1@gmail.com>: Oct 03 02:51PM -0700

On 10/3/2021 2:07 PM, Branimir Maksimovic wrote:

>> https://www.youtube.com/channel/UC_DhsJu-AbQ6Msnxdf8z6Kg

>> Does that link work for you?

> Bookmarked, will enjoy, THANKS!

No problem. Glad you can see them. Also, fwiw, here is my vector field
rendered in real time using a GLSL shader:

https://www.shadertoy.com/view/MdcyRs

You can press and drag a point around in the animation to create an
attractor in the field. Also, he is my field using WebGL:

http://fractallife247.com/test/webgl/

Does it work for you? Thanks.

How to get mantissa of long double?

Branimir Maksimovic <branimir.maksimovic@icloud.com>: Oct 03 12:45AM

> It would be safer and more portable to use FType; there's no portable
> guarantee that any integer type is large enough to hold the mantissa,
> but FType is.

How so if FType is just typedef?

--

7-77-777
Evil Sinner!
to weak you should be meek, and you should brainfuck stronger
https://github.com/rofl0r/chaos-pp

Branimir Maksimovic <branimir.maksimovic@icloud.com>: Oct 03 12:47AM

> that definition might result in the mantissa of "large" having fewer
> significant digits than it has on mine, but that's not particularly
> important.

Now outputs:
bmaxa@Branimirs-Air projects % ./a.out
max: 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.000000
large: 120645172288001372126619919515947355653853135095820093742269387851763173210613194516902557021349374744036403756647462602101944733127725809392434319981526284828181273773403605513307431121904981243948320057805774787281290926848565000291282225047507294705767371139696722108291516984908752371556782562287095382016.000000
middling: 2882400001.137778
maxll: 9223372036854775807
largell: 9223372036854775807
middlingll: 2882400001
max: 0x1.fffffffffffffp+1023
large: 0x1.579bde02468acp+1023
middling: 0x1.579bde02468adp+31
maxll: 0x7fffffffffffffff
largell: 0x7fffffffffffffff
middlingll: 0xabcdef01

--

7-77-777
Evil Sinner!
to weak you should be meek, and you should brainfuck stronger
https://github.com/rofl0r/chaos-pp

Bonita Montero <Bonita.Montero@gmail.com>: Oct 03 08:09AM +0200

So, this should be the most elegant code:

#pragma once
#include <limits>
#include <cstdint>
#include <cassert>

struct dbl_parts
{
static_assert(std::numeric_limits<double>::is_iec559, "must be standard
fp");
dbl_parts( double d );
dbl_parts &operator =( double d );
dbl_parts() = default;
operator double();
bool getSign();
std::uint16_t getBiasedExponent();
std::int16_t getExponent();
std::uint64_t getMantissa();
void setSign( bool sign );
void setBiasedExponent( uint16_t exp );
void setExponent( int16_t exp );
void setMantissa( uint64_t mantissa );
private:
static unsigned const
MANTISSA_BITS = 52;
using i64 = std::int64_t;
using ui64 = std::uint64_t;
static ui64 const
SIGN_MASK = (ui64)1 << 63,
EXP_MASK = (ui64)0x7FF << MANTISSA_BITS,
MANTISSA_MASK = ~-((i64)1 << MANTISSA_BITS),
MANTISSA_MAX = ((ui64)1 << MANTISSA_BITS) | MANTISSA_MASK;
using ui16 = std::uint16_t;
using i16 = std::int16_t;
static ui16 const
BEXP_DENORMAL = 0,
BEXP_BASE = 0x3FF,
BEXP_MAX = 0x7FF;
static i16 const
EXP_MIN = 0 - BEXP_BASE,
EXP_MAX = BEXP_MAX - BEXP_BASE;
union
{
double value;
ui64 binary;
};
};

inline
dbl_parts::dbl_parts( double d ) :
value( d )
{
}

inline
dbl_parts &dbl_parts::operator =( double d )
{
value = d;
return *this;
}

inline
dbl_parts::operator double()
{
return value;
}

inline
bool dbl_parts::getSign()
{
return (i64)binary < 0;
}

inline
std::uint16_t dbl_parts::getBiasedExponent()
{
return (ui16)(binary >> MANTISSA_BITS) & BEXP_MAX;
}

inline
int16_t dbl_parts::getExponent()
{
return (i16)(getBiasedExponent() - BEXP_BASE);
}

inline
std::uint64_t dbl_parts::getMantissa()
{
ui16 bExp = getBiasedExponent();
ui64 hiBit = (ui64)(bExp && bExp != BEXP_MAX) << MANTISSA_BITS;
return binary & MANTISSA_MASK | hiBit;
}

inline
void dbl_parts::setSign( bool sign )
{
binary = binary & ~SIGN_MASK | (ui64)sign << 63;
}

inline
void dbl_parts::setBiasedExponent( std::uint16_t exp )
{
assert(exp <= BEXP_MAX);
binary = binary & (SIGN_MASK | MANTISSA_MASK) | (ui64)exp << MANTISSA_BITS;
}

inline
void dbl_parts::setExponent( std::int16_t exp )
{
exp += BEXP_BASE;
setBiasedExponent( exp );
}

inline
void dbl_parts::setMantissa( std::uint64_t mantissa )
{
#if !defined(NDEBUG)
ui64 mantissaMax = MANTISSA_MASK | (ui64)(getBiasedExponent() !=
BEXP_DENORMAL && getBiasedExponent() != BEXP_MAX) << MANTISSA_BITS;
assert(mantissa <= mantissaMax);

soft and program

Sunday, October 3, 2021

Digest for comp.lang.c++@googlegroups.com - 16 updates in 3 topics

No comments:

Blog Archive

About Me