Skip to content

Commit 6cc21c4

Browse files
nielsdosdivinity76
andcommitted
Optimize pack()
Instead of using lookup tables, we can use a combination of shifts and byte swapping to achieve the same thing in less cycles and with less code. Benchmark files --------------- pack1.php: ```php for ($i = 0; $i < 10_000_000; ++$i) { pack("J", 0x7FFFFFFFFFFFFFFF); } ``` pack2.php: ```php for ($i = 0; $i < 4000000; ++$i) { pack("nvc*", 0x1234, 0x5678, 65, 66); } ``` On an i7-4790: ``` Benchmark 1: ./sapi/cli/php pack1.php Time (mean ± σ): 408.8 ms ± 3.4 ms [User: 406.1 ms, System: 1.6 ms] Range (min … max): 403.6 ms … 413.6 ms 10 runs Benchmark 2: ./sapi/cli/php_old pack1.php Time (mean ± σ): 451.7 ms ± 7.7 ms [User: 448.5 ms, System: 2.0 ms] Range (min … max): 442.8 ms … 461.2 ms 10 runs Summary ./sapi/cli/php pack1.php ran 1.11 ± 0.02 times faster than ./sapi/cli/php_old pack1.php Benchmark 1: ./sapi/cli/php pack2.php Time (mean ± σ): 239.3 ms ± 6.0 ms [User: 236.2 ms, System: 2.3 ms] Range (min … max): 233.2 ms … 256.8 ms 12 runs Benchmark 2: ./sapi/cli/php_old pack2.php Time (mean ± σ): 271.9 ms ± 3.3 ms [User: 269.7 ms, System: 1.3 ms] Range (min … max): 267.4 ms … 279.0 ms 11 runs Summary ./sapi/cli/php pack2.php ran 1.14 ± 0.03 times faster than ./sapi/cli/php_old pack2.php ``` On an i7-1185G7: ``` Benchmark 1: ./sapi/cli/php pack1.php Time (mean ± σ): 263.7 ms ± 1.8 ms [User: 262.6 ms, System: 0.9 ms] Range (min … max): 261.5 ms … 268.2 ms 11 runs Benchmark 2: ./sapi/cli/php_old pack1.php Time (mean ± σ): 303.3 ms ± 6.5 ms [User: 300.7 ms, System: 2.3 ms] Range (min … max): 297.4 ms … 318.1 ms 10 runs Summary ./sapi/cli/php pack1.php ran 1.15 ± 0.03 times faster than ./sapi/cli/php_old pack1.php Benchmark 1: ./sapi/cli/php pack2.php Time (mean ± σ): 156.7 ms ± 2.9 ms [User: 154.7 ms, System: 1.7 ms] Range (min … max): 151.6 ms … 164.7 ms 19 runs Benchmark 2: ./sapi/cli/php_old pack2.php Time (mean ± σ): 174.6 ms ± 3.3 ms [User: 171.9 ms, System: 2.3 ms] Range (min … max): 170.7 ms … 180.4 ms 17 runs Summary ./sapi/cli/php pack2.php ran 1.11 ± 0.03 times faster than ./sapi/cli/php_old pack2.php ``` Closes GH-18524. Co-authored-by: divinity76 <[email protected]>
1 parent 45215d0 commit 6cc21c4

File tree

6 files changed

+47
-200
lines changed

6 files changed

+47
-200
lines changed

NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@ PHP NEWS
22
|||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||||
33
?? ??? ????, PHP 8.5.0alpha2
44

5+
- Standard:
6+
. Optimized pack(). (nielsdos, divinity76)
7+
58
- URI:
69
. Return the singleton UrlValidationErrorType instances from Uri\WhatWg\Url
710
instead of creating new objects that are different from the singleton.

UPGRADING

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -619,6 +619,7 @@ PHP 8.5 UPGRADE NOTES
619619
. Improved performance of urlencode() and rawurlencode().
620620
. Improved unpack() performance with nameless repetitions by avoiding
621621
creating temporary strings and reparsing them.
622+
. Improved pack() performance.
622623

623624
- XMLReader:
624625
. Improved property access performance.

ext/standard/basic_functions.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */
299299

300300
BASIC_MINIT_SUBMODULE(var)
301301
BASIC_MINIT_SUBMODULE(file)
302-
BASIC_MINIT_SUBMODULE(pack)
303302
BASIC_MINIT_SUBMODULE(browscap)
304303
BASIC_MINIT_SUBMODULE(standard_filters)
305304
BASIC_MINIT_SUBMODULE(user_filters)

ext/standard/pack.c

Lines changed: 43 additions & 176 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include <stdlib.h>
2020
#include <errno.h>
2121
#include <sys/types.h>
22-
#include "pack.h"
2322

2423
#define INC_OUTPUTPOS(a,b) \
2524
if ((a) < 0 || ((INT_MAX - outputpos)/((int)b)) < (a)) { \
@@ -30,10 +29,23 @@
3029
} \
3130
outputpos += (a)*(b);
3231

32+
typedef enum {
33+
PHP_LITTLE_ENDIAN,
34+
PHP_BIG_ENDIAN,
35+
} php_pack_endianness;
36+
3337
#ifdef WORDS_BIGENDIAN
34-
#define MACHINE_LITTLE_ENDIAN 0
38+
# define MACHINE_LITTLE_ENDIAN 0
39+
# define PHP_MACHINE_ENDIAN PHP_BIG_ENDIAN
3540
#else
36-
#define MACHINE_LITTLE_ENDIAN 1
41+
# define MACHINE_LITTLE_ENDIAN 1
42+
# define PHP_MACHINE_ENDIAN PHP_LITTLE_ENDIAN
43+
#endif
44+
45+
#ifdef ZEND_ENABLE_ZVAL_LONG64
46+
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP64(u)
47+
#else
48+
# define PHP_LONG_BSWAP(u) ZEND_BYTES_SWAP32(u)
3749
#endif
3850

3951
typedef ZEND_SET_ALIGNED(1, uint16_t unaligned_uint16_t);
@@ -42,41 +54,23 @@ typedef ZEND_SET_ALIGNED(1, uint64_t unaligned_uint64_t);
4254
typedef ZEND_SET_ALIGNED(1, unsigned int unaligned_uint);
4355
typedef ZEND_SET_ALIGNED(1, int unaligned_int);
4456

45-
/* Mapping of byte from char (8bit) to long for machine endian */
46-
static int byte_map[1];
47-
48-
/* Mappings of bytes from int (machine dependent) to int for machine endian */
49-
static int int_map[sizeof(int)];
50-
51-
/* Mappings of bytes from shorts (16bit) for all endian environments */
52-
static int machine_endian_short_map[2];
53-
static int big_endian_short_map[2];
54-
static int little_endian_short_map[2];
55-
56-
/* Mappings of bytes from longs (32bit) for all endian environments */
57-
static int machine_endian_long_map[4];
58-
static int big_endian_long_map[4];
59-
static int little_endian_long_map[4];
60-
61-
#if SIZEOF_ZEND_LONG > 4
62-
/* Mappings of bytes from quads (64bit) for all endian environments */
63-
static int machine_endian_longlong_map[8];
64-
static int big_endian_longlong_map[8];
65-
static int little_endian_longlong_map[8];
66-
#endif
67-
6857
/* {{{ php_pack */
69-
static void php_pack(zval *val, size_t size, int *map, char *output)
58+
static void php_pack(const zval *val, size_t size, php_pack_endianness endianness, char *output)
7059
{
71-
size_t i;
72-
char *v;
73-
74-
convert_to_long(val);
75-
v = (char *) &Z_LVAL_P(val);
60+
zend_ulong zl = zval_get_long(val);
7661

77-
for (i = 0; i < size; i++) {
78-
*output++ = v[map[i]];
62+
if ((endianness == PHP_LITTLE_ENDIAN) != MACHINE_LITTLE_ENDIAN) {
63+
zl = PHP_LONG_BSWAP(zl);
64+
#if MACHINE_LITTLE_ENDIAN
65+
zl >>= (sizeof(zl) - size) * 8;
66+
#endif
67+
} else {
68+
#if !MACHINE_LITTLE_ENDIAN
69+
zl <<= (sizeof(zl) - size) * 8;
70+
#endif
7971
}
72+
73+
memcpy(output, (const char *) &zl, size);
8074
}
8175
/* }}} */
8276

@@ -88,10 +82,7 @@ ZEND_ATTRIBUTE_CONST static inline uint16_t php_pack_reverse_int16(uint16_t arg)
8882
/* {{{ php_pack_reverse_int32 */
8983
ZEND_ATTRIBUTE_CONST static inline uint32_t php_pack_reverse_int32(uint32_t arg)
9084
{
91-
uint32_t result;
92-
result = ((arg & 0xFF) << 24) | ((arg & 0xFF00) << 8) | ((arg >> 8) & 0xFF00) | ((arg >> 24) & 0xFF);
93-
94-
return result;
85+
return ZEND_BYTES_SWAP32(arg);
9586
}
9687
/* }}} */
9788

@@ -509,7 +500,7 @@ PHP_FUNCTION(pack)
509500
case 'c':
510501
case 'C':
511502
while (arg-- > 0) {
512-
php_pack(&argv[currentarg++], 1, byte_map, &ZSTR_VAL(output)[outputpos]);
503+
php_pack(&argv[currentarg++], 1, PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
513504
outputpos++;
514505
}
515506
break;
@@ -518,16 +509,16 @@ PHP_FUNCTION(pack)
518509
case 'S':
519510
case 'n':
520511
case 'v': {
521-
int *map = machine_endian_short_map;
512+
php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
522513

523514
if (code == 'n') {
524-
map = big_endian_short_map;
515+
endianness = PHP_BIG_ENDIAN;
525516
} else if (code == 'v') {
526-
map = little_endian_short_map;
517+
endianness = PHP_LITTLE_ENDIAN;
527518
}
528519

529520
while (arg-- > 0) {
530-
php_pack(&argv[currentarg++], 2, map, &ZSTR_VAL(output)[outputpos]);
521+
php_pack(&argv[currentarg++], 2, endianness, &ZSTR_VAL(output)[outputpos]);
531522
outputpos += 2;
532523
}
533524
break;
@@ -536,7 +527,7 @@ PHP_FUNCTION(pack)
536527
case 'i':
537528
case 'I':
538529
while (arg-- > 0) {
539-
php_pack(&argv[currentarg++], sizeof(int), int_map, &ZSTR_VAL(output)[outputpos]);
530+
php_pack(&argv[currentarg++], sizeof(int), PHP_MACHINE_ENDIAN, &ZSTR_VAL(output)[outputpos]);
540531
outputpos += sizeof(int);
541532
}
542533
break;
@@ -545,16 +536,16 @@ PHP_FUNCTION(pack)
545536
case 'L':
546537
case 'N':
547538
case 'V': {
548-
int *map = machine_endian_long_map;
539+
php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
549540

550541
if (code == 'N') {
551-
map = big_endian_long_map;
542+
endianness = PHP_BIG_ENDIAN;
552543
} else if (code == 'V') {
553-
map = little_endian_long_map;
544+
endianness = PHP_LITTLE_ENDIAN;
554545
}
555546

556547
while (arg-- > 0) {
557-
php_pack(&argv[currentarg++], 4, map, &ZSTR_VAL(output)[outputpos]);
548+
php_pack(&argv[currentarg++], 4, endianness, &ZSTR_VAL(output)[outputpos]);
558549
outputpos += 4;
559550
}
560551
break;
@@ -565,16 +556,16 @@ PHP_FUNCTION(pack)
565556
case 'Q':
566557
case 'J':
567558
case 'P': {
568-
int *map = machine_endian_longlong_map;
559+
php_pack_endianness endianness = PHP_MACHINE_ENDIAN;
569560

570561
if (code == 'J') {
571-
map = big_endian_longlong_map;
562+
endianness = PHP_BIG_ENDIAN;
572563
} else if (code == 'P') {
573-
map = little_endian_longlong_map;
564+
endianness = PHP_LITTLE_ENDIAN;
574565
}
575566

576567
while (arg-- > 0) {
577-
php_pack(&argv[currentarg++], 8, map, &ZSTR_VAL(output)[outputpos]);
568+
php_pack(&argv[currentarg++], 8, endianness, &ZSTR_VAL(output)[outputpos]);
578569
outputpos += 8;
579570
}
580571
break;
@@ -1178,127 +1169,3 @@ PHP_FUNCTION(unpack)
11781169
}
11791170
}
11801171
/* }}} */
1181-
1182-
/* {{{ PHP_MINIT_FUNCTION */
1183-
PHP_MINIT_FUNCTION(pack)
1184-
{
1185-
int i;
1186-
1187-
if (MACHINE_LITTLE_ENDIAN) {
1188-
/* Where to get lo to hi bytes from */
1189-
byte_map[0] = 0;
1190-
1191-
for (i = 0; i < (int)sizeof(int); i++) {
1192-
int_map[i] = i;
1193-
}
1194-
1195-
machine_endian_short_map[0] = 0;
1196-
machine_endian_short_map[1] = 1;
1197-
big_endian_short_map[0] = 1;
1198-
big_endian_short_map[1] = 0;
1199-
little_endian_short_map[0] = 0;
1200-
little_endian_short_map[1] = 1;
1201-
1202-
machine_endian_long_map[0] = 0;
1203-
machine_endian_long_map[1] = 1;
1204-
machine_endian_long_map[2] = 2;
1205-
machine_endian_long_map[3] = 3;
1206-
big_endian_long_map[0] = 3;
1207-
big_endian_long_map[1] = 2;
1208-
big_endian_long_map[2] = 1;
1209-
big_endian_long_map[3] = 0;
1210-
little_endian_long_map[0] = 0;
1211-
little_endian_long_map[1] = 1;
1212-
little_endian_long_map[2] = 2;
1213-
little_endian_long_map[3] = 3;
1214-
1215-
#if SIZEOF_ZEND_LONG > 4
1216-
machine_endian_longlong_map[0] = 0;
1217-
machine_endian_longlong_map[1] = 1;
1218-
machine_endian_longlong_map[2] = 2;
1219-
machine_endian_longlong_map[3] = 3;
1220-
machine_endian_longlong_map[4] = 4;
1221-
machine_endian_longlong_map[5] = 5;
1222-
machine_endian_longlong_map[6] = 6;
1223-
machine_endian_longlong_map[7] = 7;
1224-
big_endian_longlong_map[0] = 7;
1225-
big_endian_longlong_map[1] = 6;
1226-
big_endian_longlong_map[2] = 5;
1227-
big_endian_longlong_map[3] = 4;
1228-
big_endian_longlong_map[4] = 3;
1229-
big_endian_longlong_map[5] = 2;
1230-
big_endian_longlong_map[6] = 1;
1231-
big_endian_longlong_map[7] = 0;
1232-
little_endian_longlong_map[0] = 0;
1233-
little_endian_longlong_map[1] = 1;
1234-
little_endian_longlong_map[2] = 2;
1235-
little_endian_longlong_map[3] = 3;
1236-
little_endian_longlong_map[4] = 4;
1237-
little_endian_longlong_map[5] = 5;
1238-
little_endian_longlong_map[6] = 6;
1239-
little_endian_longlong_map[7] = 7;
1240-
#endif
1241-
}
1242-
else {
1243-
zval val;
1244-
int size = sizeof(Z_LVAL(val));
1245-
Z_LVAL(val)=0; /*silence a warning*/
1246-
1247-
/* Where to get hi to lo bytes from */
1248-
byte_map[0] = size - 1;
1249-
1250-
for (i = 0; i < (int)sizeof(int); i++) {
1251-
int_map[i] = size - (sizeof(int) - i);
1252-
}
1253-
1254-
machine_endian_short_map[0] = size - 2;
1255-
machine_endian_short_map[1] = size - 1;
1256-
big_endian_short_map[0] = size - 2;
1257-
big_endian_short_map[1] = size - 1;
1258-
little_endian_short_map[0] = size - 1;
1259-
little_endian_short_map[1] = size - 2;
1260-
1261-
machine_endian_long_map[0] = size - 4;
1262-
machine_endian_long_map[1] = size - 3;
1263-
machine_endian_long_map[2] = size - 2;
1264-
machine_endian_long_map[3] = size - 1;
1265-
big_endian_long_map[0] = size - 4;
1266-
big_endian_long_map[1] = size - 3;
1267-
big_endian_long_map[2] = size - 2;
1268-
big_endian_long_map[3] = size - 1;
1269-
little_endian_long_map[0] = size - 1;
1270-
little_endian_long_map[1] = size - 2;
1271-
little_endian_long_map[2] = size - 3;
1272-
little_endian_long_map[3] = size - 4;
1273-
1274-
#if SIZEOF_ZEND_LONG > 4
1275-
machine_endian_longlong_map[0] = size - 8;
1276-
machine_endian_longlong_map[1] = size - 7;
1277-
machine_endian_longlong_map[2] = size - 6;
1278-
machine_endian_longlong_map[3] = size - 5;
1279-
machine_endian_longlong_map[4] = size - 4;
1280-
machine_endian_longlong_map[5] = size - 3;
1281-
machine_endian_longlong_map[6] = size - 2;
1282-
machine_endian_longlong_map[7] = size - 1;
1283-
big_endian_longlong_map[0] = size - 8;
1284-
big_endian_longlong_map[1] = size - 7;
1285-
big_endian_longlong_map[2] = size - 6;
1286-
big_endian_longlong_map[3] = size - 5;
1287-
big_endian_longlong_map[4] = size - 4;
1288-
big_endian_longlong_map[5] = size - 3;
1289-
big_endian_longlong_map[6] = size - 2;
1290-
big_endian_longlong_map[7] = size - 1;
1291-
little_endian_longlong_map[0] = size - 1;
1292-
little_endian_longlong_map[1] = size - 2;
1293-
little_endian_longlong_map[2] = size - 3;
1294-
little_endian_longlong_map[3] = size - 4;
1295-
little_endian_longlong_map[4] = size - 5;
1296-
little_endian_longlong_map[5] = size - 6;
1297-
little_endian_longlong_map[6] = size - 7;
1298-
little_endian_longlong_map[7] = size - 8;
1299-
#endif
1300-
}
1301-
1302-
return SUCCESS;
1303-
}
1304-
/* }}} */

ext/standard/pack.h

Lines changed: 0 additions & 22 deletions
This file was deleted.

ext/standard/php_standard.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929
#include "php_ext_syslog.h"
3030
#include "php_filestat.h"
3131
#include "php_browscap.h"
32-
#include "pack.h"
3332
#include "url.h"
3433
#include "pageinfo.h"
3534
#include "fsock.h"

0 commit comments

Comments
 (0)