diff --git a/XSUB.h b/XSUB.h index 9b7e98f64c56..9ff6f5222481 100644 --- a/XSUB.h +++ b/XSUB.h @@ -603,10 +603,12 @@ Rethrows a previously caught exception. See L. # define signal PerlProc_signal # define getpid PerlProc_getpid # define gettimeofday PerlProc_gettimeofday +#ifndef PERL_MY_HOST_NET_BYTE_SWAP # define htonl PerlSock_htonl # define htons PerlSock_htons # define ntohl PerlSock_ntohl # define ntohs PerlSock_ntohs +#endif # define accept PerlSock_accept # define bind PerlSock_bind # define connect PerlSock_connect diff --git a/embed.fnc b/embed.fnc index 762f47f06c63..98f2f5cd5723 100644 --- a/embed.fnc +++ b/embed.fnc @@ -6468,7 +6468,13 @@ p |bool |get_win32_message_utf8ness \ |NULLOK const char *string Teor |void |win32_croak_not_implemented \ |NN const char *fname -#else +# if !defined(PERL_MY_HOST_NET_BYTE_SWAP) +DTbo |u_long |win32_htonl |u_long hostlong +DTbo |u_short|win32_htons |u_short hostshort +DTbo |u_long |win32_ntohl |u_long netlong +DTbo |u_short|win32_ntohs |u_short netshort +# endif +#else /* if !defined(WIN32) */ p |bool |do_exec3 |NN const char *incmd \ |int fd \ |int do_report diff --git a/iperlsys.h b/iperlsys.h index 8db70506019f..0b6951b41464 100644 --- a/iperlsys.h +++ b/iperlsys.h @@ -1144,10 +1144,12 @@ struct IPerlProcInfo /* PerlSock */ struct IPerlSock; struct IPerlSockInfo; +#ifndef PERL_MY_HOST_NET_BYTE_SWAP typedef u_long (*LPHtonl)(const struct IPerlSock**, u_long); typedef u_short (*LPHtons)(const struct IPerlSock**, u_short); typedef u_long (*LPNtohl)(const struct IPerlSock**, u_long); typedef u_short (*LPNtohs)(const struct IPerlSock**, u_short); +#endif typedef SOCKET (*LPAccept)(const struct IPerlSock**, SOCKET, struct sockaddr*, int*); typedef int (*LPBind)(const struct IPerlSock**, SOCKET, @@ -1208,10 +1210,12 @@ typedef int (*LPClosesocket)(const struct IPerlSock**, SOCKET s); struct IPerlSock { +#ifndef PERL_MY_HOST_NET_BYTE_SWAP LPHtonl pHtonl; LPHtons pHtons; LPNtohl pNtohl; LPNtohs pNtohs; +#endif LPAccept pAccept; LPBind pBind; LPConnect pConnect; @@ -1262,6 +1266,19 @@ struct IPerlSockInfo struct IPerlSock perlSockList; }; +#ifdef PERL_MY_HOST_NET_BYTE_SWAP + /* perl.h has provides a much more efficient inlined implementation of + htonl(), htons(), ntohl(), ntohs() compared to the "native" exported + extern linkage functions exported by ws2_32.dll. Both Mingw GCCs + and MSVCs headers, only offer exports from ws2_32.dll for those 4 + tokens, without any alternative. Writing "r = htonl(n);" a C Windows + app is an improper anti-pattern. The official, correct, identifier is + RtlUlongByteSwap() on the Windows Platform. */ +# define PerlSock_htonl(x) htonl(x) +# define PerlSock_htons(x) htons(x) +# define PerlSock_ntohl(x) ntohl(x) +# define PerlSock_ntohs(x) ntohs(x) +#else # define PerlSock_htonl(x) \ ((*(PL_Sock))->pHtonl)(PL_Sock, x) # define PerlSock_htons(x) \ @@ -1270,6 +1287,8 @@ struct IPerlSockInfo ((*(PL_Sock))->pNtohl)(PL_Sock, x) # define PerlSock_ntohs(x) \ ((*(PL_Sock))->pNtohs)(PL_Sock, x) +#endif + # define PerlSock_accept(s, a, l) \ ((*(PL_Sock))->pAccept)(PL_Sock, s, a, l) # define PerlSock_bind(s, n, l) \ diff --git a/mathoms.c b/mathoms.c index fc7db080934e..cec79cdba807 100644 --- a/mathoms.c +++ b/mathoms.c @@ -883,6 +883,57 @@ Perl_utf8_to_uvchr(pTHX_ const U8 *s, STRLEN *retlen) return utf8_to_uvchr_buf(s, s + UTF8_CHK_SKIP(s), retlen); } +/* These 4 exported functions are unused/deprecated/mathoms. WinPerl hooks + and replaces the htonl(), ntohl(), etc symbols provided by Mingw GCC and + MSVC headers, which are exported extern "C" functions from ws2_32.dll, + with 1 CPU instruction big, inline intrinsics. ws2_32.dll's implementation + is 11-15 instructions long. Clang and GCC for WinOS correctly convert + expression "& << |" to 1 CPU instruction, MSVC build numbers released prior + to Fall 2023 don't do that 1 CPU opcode optimization. + Because of frozen public API src code compatibility and object code linker + reasons, neither (Mingw or MS SDK) Clang, Mingw GCC or MSVC compilers can + correct the day 1 1993 mistake that tokens htonl(), ntohl(), etc, are + external linkage symbols from ws2_32.dll, + + cpangrep shows exactly 1 module uses WinPerl's win32_*() prefixed sockets API + byte order swappers. + + cpangrep: win32_htonl|win32_htons|win32_ntohl|win32_ntohs + https://metacpan.org/release/Prima/source/win32/files.c#L750 + + For now, these symbols are still exported, incase they are linked by a + XS .dll, that has a TU/.o/.c that doesn't #include "perl.h" and + declared function win32_htonl() themselves or they are using GetProcAddress(). */ + +#undef win32_htonl +#undef win32_htons +#undef win32_ntohl +#undef win32_ntohs + +u_long +win32_htonl(u_long hostlong) +{ + return htonl(hostlong); +} + +u_short +win32_htons(u_short hostshort) +{ + return htons(hostshort); +} + +u_long +win32_ntohl(u_long netlong) +{ + return ntohl(netlong); +} + +u_short +win32_ntohs(u_short netshort) +{ + return ntohs(netshort); +} + GCC_DIAG_RESTORE #endif /* NO_MATHOMS */ diff --git a/perl.h b/perl.h index 5aaeea0cc223..8093c9cb2a36 100644 --- a/perl.h +++ b/perl.h @@ -4571,23 +4571,61 @@ struct ptr_tbl { struct ptr_tbl_ent *tbl_arena_end; }; -#if defined(htonl) && !defined(HAS_HTONL) -#define HAS_HTONL -#endif -#if defined(htons) && !defined(HAS_HTONS) -#define HAS_HTONS -#endif -#if defined(ntohl) && !defined(HAS_NTOHL) -#define HAS_NTOHL -#endif -#if defined(ntohs) && !defined(HAS_NTOHS) -#define HAS_NTOHS +/* Override the C compiler's built in byte order swapping functions and + implement our own BE/LE conversion functions. All modern CCs on all CPU archs + should be using an inline intrinsic, that maps to 1 CPU instructions, max 3. + Worst possible correct and incompetent implementation a C compiler can do: + + libperl calls symbol htonl() through PLT/GOT to libc, libc then declares + stack 2 x unsigned char buf[4]; plus 2 calls through PLT/GOT to symbol + memcpy(); plus for( i=0; i<4; i++) {} loop. + + Before defining PERL_MY_HOST_NET_BYTE_SWAP, remember to read the -O1 or -O2 + optimized assembly code created by a production grade C compiler, to see + if there actually is a defect/flaw or not with the permutation of CC/CPU/OS + you are using. Don't accidentally turn 1 CPU op into 6 CPU ops, and document + that change as an "optimization". + + The only known flawed CC is MSVC all versions and build numbers of + cl.exe/link.exe released to the pubic before Fall 2023. It has a 1 of 5 + stars, if C token "htonl()" is used (perl uses Win32's PLT/GOT). + 2 of 5 stars if "& << |" expression is used. Build numbers of MSVC released + during or after Fall 2023, produce correct and perfect machine code identical + to what GCC/Clang would emit. */ + +#ifdef PERL_MY_HOST_NET_BYTE_SWAP +# undef htonl +# undef HAS_HTONL +# undef htons +# undef HAS_HTONS +# undef ntohl +# undef HAS_NTOHL +# undef ntohl +# undef HAS_NTOHL +# undef ntohs +# undef HAS_NTOHS +# undef htonll +# undef HAS_HTONLL +#else +# if defined(htonl) && !defined(HAS_HTONL) +# define HAS_HTONL +# endif +# if defined(htons) && !defined(HAS_HTONS) +# define HAS_HTONS +# endif +# if defined(ntohl) && !defined(HAS_NTOHL) +# define HAS_NTOHL +# endif +# if defined(ntohs) && !defined(HAS_NTOHS) +# define HAS_NTOHS +# endif #endif + #ifndef HAS_HTONL -#define HAS_HTONS -#define HAS_HTONL -#define HAS_NTOHS -#define HAS_NTOHL +# define HAS_HTONS +# define HAS_HTONL +# define HAS_NTOHS +# define HAS_NTOHL # if (BYTEORDER & 0xffff) == 0x4321 /* Big endian system, so ntohl, ntohs, htonl and htons do not need to re-order their values. However, to behave identically to the alternative @@ -4604,21 +4642,65 @@ struct ptr_tbl { that *declare* the various functions are still seen. If we declare our own htonl etc they will clash with the declarations in the Win32 headers. */ +# ifdef _MSC_VER +# pragma intrinsic(_byteswap_ulong) +# pragma intrinsic(_byteswap_ushort) +# endif + +# if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING)) PERL_STATIC_INLINE U32 my_swap32(const U32 x) { +# ifdef _MSC_VER + return _byteswap_ulong(x); +# else return ((x & 0xFF) << 24) | ((x >> 24) & 0xFF) | ((x & 0x0000FF00) << 8) | ((x & 0x00FF0000) >> 8); +# endif } PERL_STATIC_INLINE U16 my_swap16(const U16 x) { + +# ifdef _MSC_VER + return _byteswap_ushort(x); +# else return ((x & 0xFF) << 8) | ((x >> 8) & 0xFF); +# endif } +# endif + +/* all CCs except MSVC use the static inlines above, unoptimized MSVC Perl + built with -DDEBUGGING, also uses the statics above, to make single stepping + and breakpoints easier to use. MSVC's C/C++ front end parser does not + recogize the traditional "& << |" expression as a synonym for i386/x64/ARM's + byteswap CPU instruction. This MSVC bug was fixed in MSVC 2022 build number + 19.37/17.7 released Aug 8 2023. All MSVC 2022 build numbers <= 19.36/17.6 + have the bug. Explicitly tell MSVC to use the byte swap opcode solves the + problem. VC's _byteswap_ulong() is declared as an intrinsic function. + It will not cause multi-eval problems the way a macro would. So skip the + my_swap() wrappers. + + TODO: add special casing for __bswap_32(), __builtin_bswap32(), + bswap_32(), cpu_to_be32(), swap32(), read_be32(), write_be32(), htobe32(), + OSSwapInt32(), and on April 1st, impliment RtlUlongByteSwap(). Remember to + the history and HW compatibility of each of these CC tokens before adding + them. If hello_world.c executes and the Desktop GUI works, the defaults + in Configure can't produce a binary that SIGILLs on 1 of 2 systems, + made 2 years apart, running the same OS version, after a copy paste or + cloud server deployment of a /usr/bin/perl file. +*/ -# define htonl(x) my_swap32(x) -# define ntohl(x) my_swap32(x) -# define ntohs(x) my_swap16(x) -# define htons(x) my_swap16(x) +# if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING)) +# define htonl(x) my_swap32(x) +# define ntohl(x) my_swap32(x) +# define ntohs(x) my_swap16(x) +# define htons(x) my_swap16(x) +# else +# define htonl(x) _byteswap_ulong(x) +# define ntohl(x) _byteswap_ulong(x) +# define ntohs(x) _byteswap_ushort(x) +# define htons(x) _byteswap_ushort(x) +# endif # else # error "Unsupported byteorder" /* The C pre-processor doesn't let us return the value of BYTEORDER as part of @@ -4635,6 +4717,51 @@ my_swap16(const U16 x) { # endif #endif +#if defined(htonll) && !defined(HAS_HTONLL) +# define HAS_HTONLL +#endif + +#ifndef HAS_HTONLL +# define HAS_HTONLL +# if (BYTEORDER & 0xffff) == 0x4321 +# define ntohll(x) ((x)&0xFFFFFFFFFFFFFFFF) +# define htonll(x) ntohll(x) +# elif BYTEORDER == 0x1234 || BYTEORDER == 0x12345678 +# ifdef _MSC_VER +# pragma intrinsic(_byteswap_uint64) +# endif + +# if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING)) +PERL_STATIC_INLINE U64 +my_swap64(const U64 x) { +# ifdef _MSC_VER + return _byteswap_uint64(x); +# else +/* return ( ((x & 0xff00000000000000) >> 56) | ((x & 0x00ff000000000000) >> 40) + | ((x & 0x0000ff0000000000) >> 24) | ((x & 0x000000ff00000000) >> 8) + | ((x & 0x00000000ff000000) << 8) | ((x & 0x0000000000ff0000) << 24) + | ((x & 0x000000000000ff00) << 40) | ((x & 0x00000000000000ff) << 56)); + return ((U64)htonl(x & 0xFFFFFFFF) << 32) | htonl(x >> 32); */ + U64 r; + r = (x & 0x00000000FFFFFFFF) << 32 | (x & 0xFFFFFFFF00000000) >> 32; + r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16; + r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8; + return r; +# endif +} +# endif +# if !defined(_MSC_VER) || (defined(_MSC_VER) && defined(DEBUGGING)) +# define htonll(x) my_swap64(x) +# define ntohll(x) my_swap64(x) +# else +# define htonll(x) _byteswap_uint64(x) +# define ntohll(x) _byteswap_uint64(x) +# endif +# else +# error "Unsupported byteorder" +# endif +#endif + /* * Little-endian byte order functions - 'v' for 'VAX', or 'reVerse'. * -DWS diff --git a/proto.h b/proto.h index d648766b4898..41bc376a0ce6 100644 --- a/proto.h +++ b/proto.h @@ -11044,6 +11044,31 @@ win32_croak_not_implemented(const char *fname) # define PERL_ARGS_ASSERT_WIN32_CROAK_NOT_IMPLEMENTED \ assert(fname) +# if !defined(PERL_MY_HOST_NET_BYTE_SWAP) + +# if !defined(NO_MATHOMS) +PERL_CALLCONV u_long +win32_htonl(u_long hostlong) + __attribute__deprecated__; +# define PERL_ARGS_ASSERT_WIN32_HTONL + +PERL_CALLCONV u_short +win32_htons(u_short hostshort) + __attribute__deprecated__; +# define PERL_ARGS_ASSERT_WIN32_HTONS + +PERL_CALLCONV u_long +win32_ntohl(u_long netlong) + __attribute__deprecated__; +# define PERL_ARGS_ASSERT_WIN32_NTOHL + +PERL_CALLCONV u_short +win32_ntohs(u_short netshort) + __attribute__deprecated__; +# define PERL_ARGS_ASSERT_WIN32_NTOHS + +# endif /* !defined(NO_MATHOMS) */ +# endif /* !defined(PERL_MY_HOST_NET_BYTE_SWAP) */ #else /* if !defined(WIN32) */ PERL_CALLCONV bool Perl_do_exec3(pTHX_ const char *incmd, int fd, int do_report) diff --git a/win32/include/sys/socket.h b/win32/include/sys/socket.h index 185e7ddab2df..9389eca4380c 100644 --- a/win32/include/sys/socket.h +++ b/win32/include/sys/socket.h @@ -62,13 +62,17 @@ int win32_ioctlsocket (SOCKET s, long cmd, u_long *argp); int win32_getpeername (SOCKET s, struct sockaddr *name, int * namelen); int win32_getsockname (SOCKET s, struct sockaddr *name, int * namelen); int win32_getsockopt (SOCKET s, int level, int optname, char * optval, int *optlen); +#ifndef PERL_MY_HOST_NET_BYTE_SWAP u_long win32_htonl (u_long hostlong); u_short win32_htons (u_short hostshort); +#endif unsigned long win32_inet_addr (const char * cp); char * win32_inet_ntoa (struct in_addr in); int win32_listen (SOCKET s, int backlog); +#ifndef PERL_MY_HOST_NET_BYTE_SWAP u_long win32_ntohl (u_long netlong); u_short win32_ntohs (u_short netshort); +#endif int win32_recv (SOCKET s, char * buf, int len, int flags); int win32_recvfrom (SOCKET s, char * buf, int len, int flags, struct sockaddr *from, int * fromlen); @@ -109,10 +113,33 @@ void win32_endservent(void); /* direct to our version */ -#define htonl win32_htonl -#define htons win32_htons -#define ntohl win32_ntohl -#define ntohs win32_ntohs +#ifndef PERL_MY_HOST_NET_BYTE_SWAP + +/* Because of hysterical raisins involving Trumpet Winsock, force the POSIX + name, to redirect into perl5XX.dll, which goes through [unimplimented/NOOP] + iperlsys.h/CPerlHost emulation on threaded WinPerls, which then redirects to + ws2_32.dll's implementation. + + No-thread WinPerl immediatly redirects to ws2_32.dll's implementation. */ +# define htonl win32_htonl +# define htons win32_htons +# define ntohl win32_ntohl +# define ntohs win32_ntohs +#else +/* These 4 win32_*() prefixed byte swap functions are macros + if #include "perl.h" is done in a TU. A manual function declaration in + a Perl XS unaware TU/.c file, that is linked with another perl aware .xs TU. + Then both TUs are linked into a XSUB/DynaLoader/EU::PXS .dll, is the + theoretical BBC risk. Hence if a TU does #include "perl.h" they get the + macro, if the TU is Perl XS unaware but manually declared these byte swappers + that TU will wind up at the ws2_32.dll exported implementation. */ + +# define win32_htonl htonl /* redirect to perl.h's very fast impl */ +# define win32_htons htons +# define win32_ntohl ntohl +# define win32_ntohs ntohs +#endif + #define inet_addr win32_inet_addr #define inet_ntoa win32_inet_ntoa diff --git a/win32/perlhost.h b/win32/perlhost.h index af5e320afae4..ee7b42a16eb9 100644 --- a/win32/perlhost.h +++ b/win32/perlhost.h @@ -1291,6 +1291,9 @@ const struct IPerlDir perlDir = /* IPerlSock */ + +#ifndef PERL_MY_HOST_NET_BYTE_SWAP + u_long PerlSockHtonl(const struct IPerlSock** piPerl, u_long hostlong) { @@ -1319,6 +1322,8 @@ PerlSockNtohs(const struct IPerlSock** piPerl, u_short netshort) return win32_ntohs(netshort); } +#endif + SOCKET PerlSockAccept(const struct IPerlSock** piPerl, SOCKET s, struct sockaddr* addr, int* addrlen) { PERL_UNUSED_ARG(piPerl); @@ -1607,10 +1612,12 @@ PerlSockIoctlsocket(const struct IPerlSock** piPerl, SOCKET s, long cmd, u_long const struct IPerlSock perlSock = { +#ifndef PERL_MY_HOST_NET_BYTE_SWAP PerlSockHtonl, PerlSockHtons, PerlSockNtohl, PerlSockNtohs, +#endif PerlSockAccept, PerlSockBind, PerlSockConnect, diff --git a/win32/win32.h b/win32/win32.h index 1b69d153c1f7..d6c3f811960a 100644 --- a/win32/win32.h +++ b/win32/win32.h @@ -25,6 +25,17 @@ # define PERL_TEXTMODE_SCRIPTS #endif +/* Don't use ws2_32.dll's extern "C" implementation of these 4 tokens. + perl.h's implementation of these is just a 1 CPU instruction big intrinsic. + ws2_32.dll's implementation is 13 CPU instructions long. Perl_pp_pack() and + Perl_pp_unpack() have no good rational, to transfer control flow to a TCPIP + driver. */ +#undef HAS_NTOHL +#undef HAS_HTONL +#undef HAS_HTONS +#undef HAS_NTOHS +#define PERL_MY_HOST_NET_BYTE_SWAP + #if defined(PERL_IMPLICIT_SYS) # define DYNAMIC_ENV_FETCH # define HAS_GETENV_LEN diff --git a/win32/win32sck.c b/win32/win32sck.c index 7289a47d9b0b..2b8f29f8c1cf 100644 --- a/win32/win32sck.c +++ b/win32/win32sck.c @@ -316,32 +316,6 @@ convert_errno_to_wsa_error(int err) } #endif /* ERRNO_HAS_POSIX_SUPPLEMENT */ -u_long -win32_htonl(u_long hostlong) -{ - return htonl(hostlong); -} - -u_short -win32_htons(u_short hostshort) -{ - return htons(hostshort); -} - -u_long -win32_ntohl(u_long netlong) -{ - return ntohl(netlong); -} - -u_short -win32_ntohs(u_short netshort) -{ - return ntohs(netshort); -} - - - SOCKET win32_accept(SOCKET s, struct sockaddr *addr, int *addrlen) {