1 /*
2     bench.c - Demo program to benchmark open-source compression algorithms
3     Copyright (C) Yann Collet 2012-2020
4 
5     GPL v2 License
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License along
18     with this program; if not, write to the Free Software Foundation, Inc.,
19     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 
21     You can contact the author at :
22     - LZ4 homepage : http://www.lz4.org
23     - LZ4 source repository : https://github.com/lz4/lz4
24 */
25 
26 
27 /*-************************************
28 *  Compiler options
29 **************************************/
30 #ifdef _MSC_VER    /* Visual Studio */
31 #  pragma warning(disable : 4127)    /* disable: C4127: conditional expression is constant */
32 #endif
33 
34 
35 /* *************************************
36 *  Includes
37 ***************************************/
38 #include "platform.h"    /* Compiler options */
39 #include "util.h"        /* UTIL_GetFileSize, UTIL_sleep */
40 #include <stdlib.h>      /* malloc, free */
41 #include <string.h>      /* memset */
42 #include <stdio.h>       /* fprintf, fopen, ftello */
43 #include <time.h>        /* clock_t, clock, CLOCKS_PER_SEC */
44 #include <assert.h>      /* assert */
45 
46 #include "datagen.h"     /* RDG_genBuffer */
47 #include "xxhash.h"
48 #include "bench.h"
49 
50 #define LZ4_STATIC_LINKING_ONLY
51 #include "lz4.h"
52 #define LZ4_HC_STATIC_LINKING_ONLY
53 #include "lz4hc.h"
54 
55 
56 /* *************************************
57 *  Compression parameters and functions
58 ***************************************/
59 
60 struct compressionParameters
61 {
62     int cLevel;
63     const char* dictBuf;
64     int dictSize;
65 
66     LZ4_stream_t* LZ4_stream;
67     LZ4_stream_t* LZ4_dictStream;
68     LZ4_streamHC_t* LZ4_streamHC;
69     LZ4_streamHC_t* LZ4_dictStreamHC;
70 
71     void (*initFunction)(
72         struct compressionParameters* pThis);
73     void (*resetFunction)(
74         const struct compressionParameters* pThis);
75     int (*blockFunction)(
76         const struct compressionParameters* pThis,
77         const char* src, char* dst, int srcSize, int dstSize);
78     void (*cleanupFunction)(
79         const struct compressionParameters* pThis);
80 };
81 
LZ4_compressInitNoStream(struct compressionParameters * pThis)82 static void LZ4_compressInitNoStream(
83     struct compressionParameters* pThis)
84 {
85     pThis->LZ4_stream = NULL;
86     pThis->LZ4_dictStream = NULL;
87     pThis->LZ4_streamHC = NULL;
88     pThis->LZ4_dictStreamHC = NULL;
89 }
90 
LZ4_compressInitStream(struct compressionParameters * pThis)91 static void LZ4_compressInitStream(
92     struct compressionParameters* pThis)
93 {
94     pThis->LZ4_stream = LZ4_createStream();
95     pThis->LZ4_dictStream = LZ4_createStream();
96     pThis->LZ4_streamHC = NULL;
97     pThis->LZ4_dictStreamHC = NULL;
98     LZ4_loadDict(pThis->LZ4_dictStream, pThis->dictBuf, pThis->dictSize);
99 }
100 
LZ4_compressInitStreamHC(struct compressionParameters * pThis)101 static void LZ4_compressInitStreamHC(
102     struct compressionParameters* pThis)
103 {
104     pThis->LZ4_stream = NULL;
105     pThis->LZ4_dictStream = NULL;
106     pThis->LZ4_streamHC = LZ4_createStreamHC();
107     pThis->LZ4_dictStreamHC = LZ4_createStreamHC();
108     LZ4_loadDictHC(pThis->LZ4_dictStreamHC, pThis->dictBuf, pThis->dictSize);
109 }
110 
LZ4_compressResetNoStream(const struct compressionParameters * pThis)111 static void LZ4_compressResetNoStream(
112     const struct compressionParameters* pThis)
113 {
114     (void)pThis;
115 }
116 
LZ4_compressResetStream(const struct compressionParameters * pThis)117 static void LZ4_compressResetStream(
118     const struct compressionParameters* pThis)
119 {
120     LZ4_resetStream_fast(pThis->LZ4_stream);
121     LZ4_attach_dictionary(pThis->LZ4_stream, pThis->LZ4_dictStream);
122 }
123 
LZ4_compressResetStreamHC(const struct compressionParameters * pThis)124 static void LZ4_compressResetStreamHC(
125     const struct compressionParameters* pThis)
126 {
127     LZ4_resetStreamHC_fast(pThis->LZ4_streamHC, pThis->cLevel);
128     LZ4_attach_HC_dictionary(pThis->LZ4_streamHC, pThis->LZ4_dictStreamHC);
129 }
130 
LZ4_compressBlockNoStream(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)131 static int LZ4_compressBlockNoStream(
132     const struct compressionParameters* pThis,
133     const char* src, char* dst,
134     int srcSize, int dstSize)
135 {
136     int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1;
137     return LZ4_compress_fast(src, dst, srcSize, dstSize, acceleration);
138 }
139 
LZ4_compressBlockNoStreamHC(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)140 static int LZ4_compressBlockNoStreamHC(
141     const struct compressionParameters* pThis,
142     const char* src, char* dst,
143     int srcSize, int dstSize)
144 {
145     return LZ4_compress_HC(src, dst, srcSize, dstSize, pThis->cLevel);
146 }
147 
LZ4_compressBlockStream(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)148 static int LZ4_compressBlockStream(
149     const struct compressionParameters* pThis,
150     const char* src, char* dst,
151     int srcSize, int dstSize)
152 {
153     int const acceleration = (pThis->cLevel < 0) ? -pThis->cLevel + 1 : 1;
154     return LZ4_compress_fast_continue(pThis->LZ4_stream, src, dst, srcSize, dstSize, acceleration);
155 }
156 
LZ4_compressBlockStreamHC(const struct compressionParameters * pThis,const char * src,char * dst,int srcSize,int dstSize)157 static int LZ4_compressBlockStreamHC(
158     const struct compressionParameters* pThis,
159     const char* src, char* dst,
160     int srcSize, int dstSize)
161 {
162     return LZ4_compress_HC_continue(pThis->LZ4_streamHC, src, dst, srcSize, dstSize);
163 }
164 
LZ4_compressCleanupNoStream(const struct compressionParameters * pThis)165 static void LZ4_compressCleanupNoStream(
166     const struct compressionParameters* pThis)
167 {
168     (void)pThis;
169 }
170 
LZ4_compressCleanupStream(const struct compressionParameters * pThis)171 static void LZ4_compressCleanupStream(
172     const struct compressionParameters* pThis)
173 {
174     LZ4_freeStream(pThis->LZ4_stream);
175     LZ4_freeStream(pThis->LZ4_dictStream);
176 }
177 
LZ4_compressCleanupStreamHC(const struct compressionParameters * pThis)178 static void LZ4_compressCleanupStreamHC(
179     const struct compressionParameters* pThis)
180 {
181     LZ4_freeStreamHC(pThis->LZ4_streamHC);
182     LZ4_freeStreamHC(pThis->LZ4_dictStreamHC);
183 }
184 
LZ4_buildCompressionParameters(struct compressionParameters * pParams,int cLevel,const char * dictBuf,int dictSize)185 static void LZ4_buildCompressionParameters(
186     struct compressionParameters* pParams,
187     int cLevel, const char* dictBuf, int dictSize)
188 {
189     pParams->cLevel = cLevel;
190     pParams->dictBuf = dictBuf;
191     pParams->dictSize = dictSize;
192 
193     if (dictSize) {
194         if (cLevel < LZ4HC_CLEVEL_MIN) {
195             pParams->initFunction = LZ4_compressInitStream;
196             pParams->resetFunction = LZ4_compressResetStream;
197             pParams->blockFunction = LZ4_compressBlockStream;
198             pParams->cleanupFunction = LZ4_compressCleanupStream;
199         } else {
200             pParams->initFunction = LZ4_compressInitStreamHC;
201             pParams->resetFunction = LZ4_compressResetStreamHC;
202             pParams->blockFunction = LZ4_compressBlockStreamHC;
203             pParams->cleanupFunction = LZ4_compressCleanupStreamHC;
204         }
205     } else {
206         pParams->initFunction = LZ4_compressInitNoStream;
207         pParams->resetFunction = LZ4_compressResetNoStream;
208         pParams->cleanupFunction = LZ4_compressCleanupNoStream;
209 
210         if (cLevel < LZ4HC_CLEVEL_MIN) {
211             pParams->blockFunction = LZ4_compressBlockNoStream;
212         } else {
213             pParams->blockFunction = LZ4_compressBlockNoStreamHC;
214         }
215     }
216 }
217 
218 #define LZ4_isError(errcode) (errcode==0)
219 
220 
221 /* *************************************
222 *  Constants
223 ***************************************/
224 #ifndef LZ4_GIT_COMMIT_STRING
225 #  define LZ4_GIT_COMMIT_STRING ""
226 #else
227 #  define LZ4_GIT_COMMIT_STRING LZ4_EXPAND_AND_QUOTE(LZ4_GIT_COMMIT)
228 #endif
229 
230 #define NBSECONDS             3
231 #define TIMELOOP_MICROSEC     1*1000000ULL /* 1 second */
232 #define TIMELOOP_NANOSEC      1*1000000000ULL /* 1 second */
233 #define ACTIVEPERIOD_MICROSEC 70*1000000ULL /* 70 seconds */
234 #define COOLPERIOD_SEC        10
235 #define DECOMP_MULT           1 /* test decompression DECOMP_MULT times longer than compression */
236 
237 #define KB *(1 <<10)
238 #define MB *(1 <<20)
239 #define GB *(1U<<30)
240 
241 #define LZ4_MAX_DICT_SIZE (64 KB)
242 
243 static const size_t maxMemory = (sizeof(size_t)==4)  ?  (2 GB - 64 MB) : (size_t)(1ULL << ((sizeof(size_t)*8)-31));
244 
245 static U32 g_compressibilityDefault = 50;
246 
247 
248 /* *************************************
249 *  console display
250 ***************************************/
251 #define DISPLAY(...)         fprintf(stderr, __VA_ARGS__)
252 #define DISPLAYLEVEL(l, ...) if (g_displayLevel>=l) { DISPLAY(__VA_ARGS__); }
253 static U32 g_displayLevel = 2;   /* 0 : no display;   1: errors;   2 : + result + interaction + warnings;   3 : + progression;   4 : + information */
254 
255 #define DISPLAYUPDATE(l, ...) if (g_displayLevel>=l) { \
256             if ((clock() - g_time > refreshRate) || (g_displayLevel>=4)) \
257             { g_time = clock(); DISPLAY(__VA_ARGS__); \
258             if (g_displayLevel>=4) fflush(stdout); } }
259 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
260 static clock_t g_time = 0;
261 
262 
263 /* *************************************
264 *  Exceptions
265 ***************************************/
266 #ifndef DEBUG
267 #  define DEBUG 0
268 #endif
269 #define DEBUGOUTPUT(...) if (DEBUG) DISPLAY(__VA_ARGS__);
270 #define EXM_THROW(error, ...)                                             \
271 {                                                                         \
272     DEBUGOUTPUT("Error defined at %s, line %i : \n", __FILE__, __LINE__); \
273     DISPLAYLEVEL(1, "Error %i : ", error);                                \
274     DISPLAYLEVEL(1, __VA_ARGS__);                                         \
275     DISPLAYLEVEL(1, "\n");                                                \
276     exit(error);                                                          \
277 }
278 
279 
280 /* *************************************
281 *  Benchmark Parameters
282 ***************************************/
283 static U32 g_nbSeconds = NBSECONDS;
284 static size_t g_blockSize = 0;
285 int g_additionalParam = 0;
286 int g_benchSeparately = 0;
287 
BMK_setNotificationLevel(unsigned level)288 void BMK_setNotificationLevel(unsigned level) { g_displayLevel=level; }
289 
BMK_setAdditionalParam(int additionalParam)290 void BMK_setAdditionalParam(int additionalParam) { g_additionalParam=additionalParam; }
291 
BMK_setNbSeconds(unsigned nbSeconds)292 void BMK_setNbSeconds(unsigned nbSeconds)
293 {
294     g_nbSeconds = nbSeconds;
295     DISPLAYLEVEL(3, "- test >= %u seconds per compression / decompression -\n", g_nbSeconds);
296 }
297 
BMK_setBlockSize(size_t blockSize)298 void BMK_setBlockSize(size_t blockSize) { g_blockSize = blockSize; }
299 
BMK_setBenchSeparately(int separate)300 void BMK_setBenchSeparately(int separate) { g_benchSeparately = (separate!=0); }
301 
302 
303 /* ********************************************************
304 *  Bench functions
305 **********************************************************/
306 typedef struct {
307     const char* srcPtr;
308     size_t srcSize;
309     char*  cPtr;
310     size_t cRoom;
311     size_t cSize;
312     char*  resPtr;
313     size_t resSize;
314 } blockParam_t;
315 
316 #define MIN(a,b) ((a)<(b) ? (a) : (b))
317 #define MAX(a,b) ((a)>(b) ? (a) : (b))
318 
BMK_benchMem(const void * srcBuffer,size_t srcSize,const char * displayName,int cLevel,const size_t * fileSizes,U32 nbFiles,const char * dictBuf,int dictSize)319 static int BMK_benchMem(const void* srcBuffer, size_t srcSize,
320                         const char* displayName, int cLevel,
321                         const size_t* fileSizes, U32 nbFiles,
322                         const char* dictBuf, int dictSize)
323 {
324     size_t const blockSize = (g_blockSize>=32 ? g_blockSize : srcSize) + (!srcSize) /* avoid div by 0 */ ;
325     U32 const maxNbBlocks = (U32) ((srcSize + (blockSize-1)) / blockSize) + nbFiles;
326     blockParam_t* const blockTable = (blockParam_t*) malloc(maxNbBlocks * sizeof(blockParam_t));
327     size_t const maxCompressedSize = LZ4_compressBound((int)srcSize) + (maxNbBlocks * 1024);   /* add some room for safety */
328     void* const compressedBuffer = malloc(maxCompressedSize);
329     void* const resultBuffer = malloc(srcSize);
330     U32 nbBlocks;
331     struct compressionParameters compP;
332 
333     /* checks */
334     if (!compressedBuffer || !resultBuffer || !blockTable)
335         EXM_THROW(31, "allocation error : not enough memory");
336 
337     if (strlen(displayName)>17) displayName += strlen(displayName)-17;   /* can only display 17 characters */
338 
339     /* init */
340     LZ4_buildCompressionParameters(&compP, cLevel, dictBuf, dictSize);
341     compP.initFunction(&compP);
342 
343     /* Init blockTable data */
344     {   const char* srcPtr = (const char*)srcBuffer;
345         char* cPtr = (char*)compressedBuffer;
346         char* resPtr = (char*)resultBuffer;
347         U32 fileNb;
348         for (nbBlocks=0, fileNb=0; fileNb<nbFiles; fileNb++) {
349             size_t remaining = fileSizes[fileNb];
350             U32 const nbBlocksforThisFile = (U32)((remaining + (blockSize-1)) / blockSize);
351             U32 const blockEnd = nbBlocks + nbBlocksforThisFile;
352             for ( ; nbBlocks<blockEnd; nbBlocks++) {
353                 size_t const thisBlockSize = MIN(remaining, blockSize);
354                 blockTable[nbBlocks].srcPtr = srcPtr;
355                 blockTable[nbBlocks].cPtr = cPtr;
356                 blockTable[nbBlocks].resPtr = resPtr;
357                 blockTable[nbBlocks].srcSize = thisBlockSize;
358                 blockTable[nbBlocks].cRoom = (size_t)LZ4_compressBound((int)thisBlockSize);
359                 srcPtr += thisBlockSize;
360                 cPtr += blockTable[nbBlocks].cRoom;
361                 resPtr += thisBlockSize;
362                 remaining -= thisBlockSize;
363     }   }   }
364 
365     /* warmimg up memory */
366     RDG_genBuffer(compressedBuffer, maxCompressedSize, 0.10, 0.50, 1);
367 
368     /* Bench */
369     {   U64 fastestC = (U64)(-1LL), fastestD = (U64)(-1LL);
370         U64 const crcOrig = XXH64(srcBuffer, srcSize, 0);
371         UTIL_time_t coolTime;
372         U64 const maxTime = (g_nbSeconds * TIMELOOP_NANOSEC) + 100;
373         U32 nbCompressionLoops = (U32)((5 MB) / (srcSize+1)) + 1;  /* conservative initial compression speed estimate */
374         U32 nbDecodeLoops = (U32)((200 MB) / (srcSize+1)) + 1;  /* conservative initial decode speed estimate */
375         U64 totalCTime=0, totalDTime=0;
376         U32 cCompleted=0, dCompleted=0;
377 #       define NB_MARKS 4
378         const char* const marks[NB_MARKS] = { " |", " /", " =",  "\\" };
379         U32 markNb = 0;
380         size_t cSize = 0;
381         double ratio = 0.;
382 
383         coolTime = UTIL_getTime();
384         DISPLAYLEVEL(2, "\r%79s\r", "");
385         while (!cCompleted || !dCompleted) {
386             /* overheat protection */
387             if (UTIL_clockSpanMicro(coolTime) > ACTIVEPERIOD_MICROSEC) {
388                 DISPLAYLEVEL(2, "\rcooling down ...    \r");
389                 UTIL_sleep(COOLPERIOD_SEC);
390                 coolTime = UTIL_getTime();
391             }
392 
393             /* Compression */
394             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->\r", marks[markNb], displayName, (U32)srcSize);
395             if (!cCompleted) memset(compressedBuffer, 0xE5, maxCompressedSize);  /* warm up and erase result buffer */
396 
397             UTIL_sleepMilli(1);  /* give processor time to other processes */
398             UTIL_waitForNextTick();
399 
400             if (!cCompleted) {   /* still some time to do compression tests */
401                 UTIL_time_t const clockStart = UTIL_getTime();
402                 U32 nbLoops;
403                 for (nbLoops=0; nbLoops < nbCompressionLoops; nbLoops++) {
404                     U32 blockNb;
405                     compP.resetFunction(&compP);
406                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
407                         size_t const rSize = (size_t)compP.blockFunction(
408                             &compP,
409                             blockTable[blockNb].srcPtr, blockTable[blockNb].cPtr,
410                             (int)blockTable[blockNb].srcSize, (int)blockTable[blockNb].cRoom);
411                         if (LZ4_isError(rSize)) EXM_THROW(1, "LZ4 compression failed");
412                         blockTable[blockNb].cSize = rSize;
413                 }   }
414                 {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
415                     if (clockSpan > 0) {
416                         if (clockSpan < fastestC * nbCompressionLoops)
417                             fastestC = clockSpan / nbCompressionLoops;
418                         assert(fastestC > 0);
419                         nbCompressionLoops = (U32)(TIMELOOP_NANOSEC / fastestC) + 1;  /* aim for ~1sec */
420                     } else {
421                         assert(nbCompressionLoops < 40000000);   /* avoid overflow */
422                         nbCompressionLoops *= 100;
423                     }
424                     totalCTime += clockSpan;
425                     cCompleted = totalCTime>maxTime;
426             }   }
427 
428             cSize = 0;
429             { U32 blockNb; for (blockNb=0; blockNb<nbBlocks; blockNb++) cSize += blockTable[blockNb].cSize; }
430             cSize += !cSize;  /* avoid div by 0 */
431             ratio = (double)srcSize / (double)cSize;
432             markNb = (markNb+1) % NB_MARKS;
433             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s\r",
434                     marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
435                     ((double)srcSize / fastestC) * 1000 );
436 
437             (void)fastestD; (void)crcOrig;   /*  unused when decompression disabled */
438 #if 1
439             /* Decompression */
440             if (!dCompleted) memset(resultBuffer, 0xD6, srcSize);  /* warm result buffer */
441 
442             UTIL_sleepMilli(5); /* give processor time to other processes */
443             UTIL_waitForNextTick();
444 
445             if (!dCompleted) {
446                 UTIL_time_t const clockStart = UTIL_getTime();
447                 U32 nbLoops;
448                 for (nbLoops=0; nbLoops < nbDecodeLoops; nbLoops++) {
449                     U32 blockNb;
450                     for (blockNb=0; blockNb<nbBlocks; blockNb++) {
451                         int const regenSize = LZ4_decompress_safe_usingDict(
452                             blockTable[blockNb].cPtr, blockTable[blockNb].resPtr,
453                             (int)blockTable[blockNb].cSize, (int)blockTable[blockNb].srcSize,
454                             dictBuf, dictSize);
455                         if (regenSize < 0) {
456                             DISPLAY("LZ4_decompress_safe_usingDict() failed on block %u \n", blockNb);
457                             break;
458                         }
459                         blockTable[blockNb].resSize = (size_t)regenSize;
460                 }   }
461                 {   U64 const clockSpan = UTIL_clockSpanNano(clockStart);
462                     if (clockSpan > 0) {
463                         if (clockSpan < fastestD * nbDecodeLoops)
464                             fastestD = clockSpan / nbDecodeLoops;
465                         assert(fastestD > 0);
466                         nbDecodeLoops = (U32)(TIMELOOP_NANOSEC / fastestD) + 1;  /* aim for ~1sec */
467                     } else {
468                         assert(nbDecodeLoops < 40000000);   /* avoid overflow */
469                         nbDecodeLoops *= 100;
470                     }
471                     totalDTime += clockSpan;
472                     dCompleted = totalDTime > (DECOMP_MULT*maxTime);
473             }   }
474 
475             markNb = (markNb+1) % NB_MARKS;
476             DISPLAYLEVEL(2, "%2s-%-17.17s :%10u ->%10u (%5.3f),%6.1f MB/s ,%6.1f MB/s\r",
477                     marks[markNb], displayName, (U32)srcSize, (U32)cSize, ratio,
478                     ((double)srcSize / fastestC) * 1000,
479                     ((double)srcSize / fastestD) * 1000);
480 
481             /* CRC Checking */
482             {   U64 const crcCheck = XXH64(resultBuffer, srcSize, 0);
483                 if (crcOrig!=crcCheck) {
484                     size_t u;
485                     DISPLAY("\n!!! WARNING !!! %17s : Invalid Checksum : %x != %x   \n", displayName, (unsigned)crcOrig, (unsigned)crcCheck);
486                     for (u=0; u<srcSize; u++) {
487                         if (((const BYTE*)srcBuffer)[u] != ((const BYTE*)resultBuffer)[u]) {
488                             U32 segNb, bNb, pos;
489                             size_t bacc = 0;
490                             DISPLAY("Decoding error at pos %u ", (U32)u);
491                             for (segNb = 0; segNb < nbBlocks; segNb++) {
492                                 if (bacc + blockTable[segNb].srcSize > u) break;
493                                 bacc += blockTable[segNb].srcSize;
494                             }
495                             pos = (U32)(u - bacc);
496                             bNb = pos / (128 KB);
497                             DISPLAY("(block %u, sub %u, pos %u) \n", segNb, bNb, pos);
498                             break;
499                         }
500                         if (u==srcSize-1) {  /* should never happen */
501                             DISPLAY("no difference detected\n");
502                     }   }
503                     break;
504             }   }   /* CRC Checking */
505 #endif
506         }   /* for (testNb = 1; testNb <= (g_nbSeconds + !g_nbSeconds); testNb++) */
507 
508         if (g_displayLevel == 1) {
509             double const cSpeed = ((double)srcSize / fastestC) * 1000;
510             double const dSpeed = ((double)srcSize / fastestD) * 1000;
511             if (g_additionalParam)
512                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s (param=%d)\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName, g_additionalParam);
513             else
514                 DISPLAY("-%-3i%11i (%5.3f) %6.2f MB/s %6.1f MB/s  %s\n", cLevel, (int)cSize, ratio, cSpeed, dSpeed, displayName);
515         }
516         DISPLAYLEVEL(2, "%2i#\n", cLevel);
517     }   /* Bench */
518 
519     /* clean up */
520     compP.cleanupFunction(&compP);
521     free(blockTable);
522     free(compressedBuffer);
523     free(resultBuffer);
524     return 0;
525 }
526 
527 
BMK_findMaxMem(U64 requiredMem)528 static size_t BMK_findMaxMem(U64 requiredMem)
529 {
530     size_t step = 64 MB;
531     BYTE* testmem=NULL;
532 
533     requiredMem = (((requiredMem >> 26) + 1) << 26);
534     requiredMem += 2*step;
535     if (requiredMem > maxMemory) requiredMem = maxMemory;
536 
537     while (!testmem) {
538         if (requiredMem > step) requiredMem -= step;
539         else requiredMem >>= 1;
540         testmem = (BYTE*) malloc ((size_t)requiredMem);
541     }
542     free (testmem);
543 
544     /* keep some space available */
545     if (requiredMem > step) requiredMem -= step;
546     else requiredMem >>= 1;
547 
548     return (size_t)requiredMem;
549 }
550 
551 
BMK_benchCLevel(void * srcBuffer,size_t benchedSize,const char * displayName,int cLevel,int cLevelLast,const size_t * fileSizes,unsigned nbFiles,const char * dictBuf,int dictSize)552 static void BMK_benchCLevel(void* srcBuffer, size_t benchedSize,
553                             const char* displayName, int cLevel, int cLevelLast,
554                             const size_t* fileSizes, unsigned nbFiles,
555                             const char* dictBuf, int dictSize)
556 {
557     int l;
558 
559     const char* pch = strrchr(displayName, '\\'); /* Windows */
560     if (!pch) pch = strrchr(displayName, '/'); /* Linux */
561     if (pch) displayName = pch+1;
562 
563     SET_REALTIME_PRIORITY;
564 
565     if (g_displayLevel == 1 && !g_additionalParam)
566         DISPLAY("bench %s %s: input %u bytes, %u seconds, %u KB blocks\n", LZ4_VERSION_STRING, LZ4_GIT_COMMIT_STRING, (U32)benchedSize, g_nbSeconds, (U32)(g_blockSize>>10));
567 
568     if (cLevelLast < cLevel) cLevelLast = cLevel;
569 
570     for (l=cLevel; l <= cLevelLast; l++) {
571         BMK_benchMem(srcBuffer, benchedSize,
572                      displayName, l,
573                      fileSizes, nbFiles,
574                      dictBuf, dictSize);
575     }
576 }
577 
578 
579 /*! BMK_loadFiles() :
580     Loads `buffer` with content of files listed within `fileNamesTable`.
581     At most, fills `buffer` entirely */
BMK_loadFiles(void * buffer,size_t bufferSize,size_t * fileSizes,const char ** fileNamesTable,unsigned nbFiles)582 static void BMK_loadFiles(void* buffer, size_t bufferSize,
583                           size_t* fileSizes,
584                           const char** fileNamesTable, unsigned nbFiles)
585 {
586     size_t pos = 0, totalSize = 0;
587     unsigned n;
588     for (n=0; n<nbFiles; n++) {
589         FILE* f;
590         U64 fileSize = UTIL_getFileSize(fileNamesTable[n]);
591         if (UTIL_isDirectory(fileNamesTable[n])) {
592             DISPLAYLEVEL(2, "Ignoring %s directory...       \n", fileNamesTable[n]);
593             fileSizes[n] = 0;
594             continue;
595         }
596         f = fopen(fileNamesTable[n], "rb");
597         if (f==NULL) EXM_THROW(10, "impossible to open file %s", fileNamesTable[n]);
598         DISPLAYUPDATE(2, "Loading %s...       \r", fileNamesTable[n]);
599         if (fileSize > bufferSize-pos) { /* buffer too small - stop after this file */
600             fileSize = bufferSize-pos;
601             nbFiles=n;
602         }
603         { size_t const readSize = fread(((char*)buffer)+pos, 1, (size_t)fileSize, f);
604           if (readSize != (size_t)fileSize) EXM_THROW(11, "could not read %s", fileNamesTable[n]);
605           pos += readSize; }
606         fileSizes[n] = (size_t)fileSize;
607         totalSize += (size_t)fileSize;
608         fclose(f);
609     }
610 
611     if (totalSize == 0) EXM_THROW(12, "no data to bench");
612 }
613 
BMK_benchFileTable(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictBuf,int dictSize)614 static void BMK_benchFileTable(const char** fileNamesTable, unsigned nbFiles,
615                                int cLevel, int cLevelLast,
616                                const char* dictBuf, int dictSize)
617 {
618     void* srcBuffer;
619     size_t benchedSize;
620     size_t* fileSizes = (size_t*)malloc(nbFiles * sizeof(size_t));
621     U64 const totalSizeToLoad = UTIL_getTotalFileSize(fileNamesTable, nbFiles);
622     char mfName[20] = {0};
623 
624     if (!fileSizes) EXM_THROW(12, "not enough memory for fileSizes");
625 
626     /* Memory allocation & restrictions */
627     benchedSize = BMK_findMaxMem(totalSizeToLoad * 3) / 3;
628     if (benchedSize==0) EXM_THROW(12, "not enough memory");
629     if ((U64)benchedSize > totalSizeToLoad) benchedSize = (size_t)totalSizeToLoad;
630     if (benchedSize > LZ4_MAX_INPUT_SIZE) {
631         benchedSize = LZ4_MAX_INPUT_SIZE;
632         DISPLAY("File(s) bigger than LZ4's max input size; testing %u MB only...\n", (U32)(benchedSize >> 20));
633     } else {
634         if (benchedSize < totalSizeToLoad)
635             DISPLAY("Not enough memory; testing %u MB only...\n", (U32)(benchedSize >> 20));
636     }
637     srcBuffer = malloc(benchedSize + !benchedSize);   /* avoid alloc of zero */
638     if (!srcBuffer) EXM_THROW(12, "not enough memory");
639 
640     /* Load input buffer */
641     BMK_loadFiles(srcBuffer, benchedSize, fileSizes, fileNamesTable, nbFiles);
642 
643     /* Bench */
644     snprintf (mfName, sizeof(mfName), " %u files", nbFiles);
645     {   const char* displayName = (nbFiles > 1) ? mfName : fileNamesTable[0];
646         BMK_benchCLevel(srcBuffer, benchedSize,
647                         displayName, cLevel, cLevelLast,
648                         fileSizes, nbFiles,
649                         dictBuf, dictSize);
650     }
651 
652     /* clean up */
653     free(srcBuffer);
654     free(fileSizes);
655 }
656 
657 
BMK_syntheticTest(int cLevel,int cLevelLast,double compressibility,const char * dictBuf,int dictSize)658 static void BMK_syntheticTest(int cLevel, int cLevelLast, double compressibility,
659                               const char* dictBuf, int dictSize)
660 {
661     char name[20] = {0};
662     size_t benchedSize = 10000000;
663     void* const srcBuffer = malloc(benchedSize);
664 
665     /* Memory allocation */
666     if (!srcBuffer) EXM_THROW(21, "not enough memory");
667 
668     /* Fill input buffer */
669     RDG_genBuffer(srcBuffer, benchedSize, compressibility, 0.0, 0);
670 
671     /* Bench */
672     snprintf (name, sizeof(name), "Synthetic %2u%%", (unsigned)(compressibility*100));
673     BMK_benchCLevel(srcBuffer, benchedSize, name, cLevel, cLevelLast, &benchedSize, 1, dictBuf, dictSize);
674 
675     /* clean up */
676     free(srcBuffer);
677 }
678 
679 
BMK_benchFilesSeparately(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictBuf,int dictSize)680 int BMK_benchFilesSeparately(const char** fileNamesTable, unsigned nbFiles,
681                    int cLevel, int cLevelLast,
682                    const char* dictBuf, int dictSize)
683 {
684     unsigned fileNb;
685     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
686     if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
687     if (cLevelLast < cLevel) cLevelLast = cLevel;
688     if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
689 
690     for (fileNb=0; fileNb<nbFiles; fileNb++)
691         BMK_benchFileTable(fileNamesTable+fileNb, 1, cLevel, cLevelLast, dictBuf, dictSize);
692 
693     return 0;
694 }
695 
696 
BMK_benchFiles(const char ** fileNamesTable,unsigned nbFiles,int cLevel,int cLevelLast,const char * dictFileName)697 int BMK_benchFiles(const char** fileNamesTable, unsigned nbFiles,
698                    int cLevel, int cLevelLast,
699                    const char* dictFileName)
700 {
701     double const compressibility = (double)g_compressibilityDefault / 100;
702     char* dictBuf = NULL;
703     int dictSize = 0;
704 
705     if (cLevel > LZ4HC_CLEVEL_MAX) cLevel = LZ4HC_CLEVEL_MAX;
706     if (cLevelLast > LZ4HC_CLEVEL_MAX) cLevelLast = LZ4HC_CLEVEL_MAX;
707     if (cLevelLast < cLevel) cLevelLast = cLevel;
708     if (cLevelLast > cLevel) DISPLAYLEVEL(2, "Benchmarking levels from %d to %d\n", cLevel, cLevelLast);
709 
710     if (dictFileName) {
711         FILE* dictFile = NULL;
712         U64 dictFileSize = UTIL_getFileSize(dictFileName);
713         if (!dictFileSize) EXM_THROW(25, "Dictionary error : could not stat dictionary file");
714 
715         dictFile = fopen(dictFileName, "rb");
716         if (!dictFile) EXM_THROW(25, "Dictionary error : could not open dictionary file");
717 
718         if (dictFileSize > LZ4_MAX_DICT_SIZE) {
719             dictSize = LZ4_MAX_DICT_SIZE;
720             if (UTIL_fseek(dictFile, dictFileSize - dictSize, SEEK_SET))
721                 EXM_THROW(25, "Dictionary error : could not seek dictionary file");
722         } else {
723             dictSize = (int)dictFileSize;
724         }
725 
726         dictBuf = (char *)malloc(dictSize);
727         if (!dictBuf) EXM_THROW(25, "Allocation error : not enough memory");
728 
729         if (fread(dictBuf, 1, dictSize, dictFile) != (size_t)dictSize)
730             EXM_THROW(25, "Dictionary error : could not read dictionary file");
731 
732         fclose(dictFile);
733     }
734 
735     if (nbFiles == 0)
736         BMK_syntheticTest(cLevel, cLevelLast, compressibility, dictBuf, dictSize);
737     else {
738         if (g_benchSeparately)
739             BMK_benchFilesSeparately(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize);
740         else
741             BMK_benchFileTable(fileNamesTable, nbFiles, cLevel, cLevelLast, dictBuf, dictSize);
742     }
743 
744     free(dictBuf);
745     return 0;
746 }
747