@@ -758,6 +758,9 @@ typedef struct {
758
758
} wksps ;
759
759
} HUF_compress_tables_t ;
760
760
761
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
762
+ #define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10 /* Must be >= 2 */
763
+
761
764
/* HUF_compress_internal() :
762
765
* `workSpace_align4` must be aligned on 4-bytes boundaries,
763
766
* and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
@@ -768,7 +771,7 @@ HUF_compress_internal (void* dst, size_t dstSize,
768
771
HUF_nbStreams_e nbStreams ,
769
772
void * workSpace_align4 , size_t wkspSize ,
770
773
HUF_CElt * oldHufTable , HUF_repeat * repeat , int preferRepeat ,
771
- const int bmi2 )
774
+ const int bmi2 , unsigned suspectUncompressible )
772
775
{
773
776
HUF_compress_tables_t * const table = (HUF_compress_tables_t * )workSpace_align4 ;
774
777
BYTE * const ostart = (BYTE * )dst ;
@@ -795,6 +798,21 @@ HUF_compress_internal (void* dst, size_t dstSize,
795
798
nbStreams , oldHufTable , bmi2 );
796
799
}
797
800
801
+ /* If uncompressible data is suspected, do a smaller sampling first */
802
+ DEBUG_STATIC_ASSERT (SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2 );
803
+ if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO )) {
804
+ size_t largestTotal = 0 ;
805
+ { unsigned maxSymbolValueBegin = maxSymbolValue ;
806
+ CHECK_V_F (largestBegin , HIST_count_simple (table -> count , & maxSymbolValueBegin , (const BYTE * )src , SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE ) );
807
+ largestTotal += largestBegin ;
808
+ }
809
+ { unsigned maxSymbolValueEnd = maxSymbolValue ;
810
+ CHECK_V_F (largestEnd , HIST_count_simple (table -> count , & maxSymbolValueEnd , (const BYTE * )src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE , SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE ) );
811
+ largestTotal += largestEnd ;
812
+ }
813
+ if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE ) >> 7 )+ 4 ) return 0 ; /* heuristic : probably not compressible enough */
814
+ }
815
+
798
816
/* Scan input and build symbol stats */
799
817
{ CHECK_V_F (largest , HIST_count_wksp (table -> count , & maxSymbolValue , (const BYTE * )src , srcSize , workSpace_align4 , wkspSize ) );
800
818
if (largest == srcSize ) { * ostart = ((const BYTE * )src )[0 ]; return 1 ; } /* single symbol, rle */
@@ -860,19 +878,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
860
878
return HUF_compress_internal (dst , dstSize , src , srcSize ,
861
879
maxSymbolValue , huffLog , HUF_singleStream ,
862
880
workSpace , wkspSize ,
863
- NULL , NULL , 0 , 0 /*bmi2*/ );
881
+ NULL , NULL , 0 , 0 /*bmi2*/ , 0 );
864
882
}
865
883
866
884
size_t HUF_compress1X_repeat (void * dst , size_t dstSize ,
867
885
const void * src , size_t srcSize ,
868
886
unsigned maxSymbolValue , unsigned huffLog ,
869
887
void * workSpace , size_t wkspSize ,
870
- HUF_CElt * hufTable , HUF_repeat * repeat , int preferRepeat , int bmi2 )
888
+ HUF_CElt * hufTable , HUF_repeat * repeat , int preferRepeat ,
889
+ int bmi2 , unsigned suspectUncompressible )
871
890
{
872
891
return HUF_compress_internal (dst , dstSize , src , srcSize ,
873
892
maxSymbolValue , huffLog , HUF_singleStream ,
874
893
workSpace , wkspSize , hufTable ,
875
- repeat , preferRepeat , bmi2 );
894
+ repeat , preferRepeat , bmi2 , suspectUncompressible );
876
895
}
877
896
878
897
/* HUF_compress4X_repeat():
@@ -886,22 +905,23 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
886
905
return HUF_compress_internal (dst , dstSize , src , srcSize ,
887
906
maxSymbolValue , huffLog , HUF_fourStreams ,
888
907
workSpace , wkspSize ,
889
- NULL , NULL , 0 , 0 /*bmi2*/ );
908
+ NULL , NULL , 0 , 0 /*bmi2*/ , 0 );
890
909
}
891
910
892
911
/* HUF_compress4X_repeat():
893
912
* compress input using 4 streams.
913
+ * consider skipping quickly
894
914
* re-use an existing huffman compression table */
895
915
size_t HUF_compress4X_repeat (void * dst , size_t dstSize ,
896
916
const void * src , size_t srcSize ,
897
917
unsigned maxSymbolValue , unsigned huffLog ,
898
918
void * workSpace , size_t wkspSize ,
899
- HUF_CElt * hufTable , HUF_repeat * repeat , int preferRepeat , int bmi2 )
919
+ HUF_CElt * hufTable , HUF_repeat * repeat , int preferRepeat , int bmi2 , unsigned suspectUncompressible )
900
920
{
901
921
return HUF_compress_internal (dst , dstSize , src , srcSize ,
902
922
maxSymbolValue , huffLog , HUF_fourStreams ,
903
923
workSpace , wkspSize ,
904
- hufTable , repeat , preferRepeat , bmi2 );
924
+ hufTable , repeat , preferRepeat , bmi2 , suspectUncompressible );
905
925
}
906
926
907
927
#ifndef ZSTD_NO_UNUSED_FUNCTIONS
0 commit comments