Skip to content
Primal Pappachan edited this page Jun 24, 2020 · 1 revision

Set histogram sampling rate by setting the memory limit (100 MB in example)

 Set histogram_generation_max_mem_size = 100000000;

Generate histogram for a column

analyze table `MALL_OBSERVATION` update histogram on shop_name with 50 buckets;

drop histogram

analyze table `MALL_OBSERVATION` drop histogram on shop_name;

To check all the histograms generated

SELECT TABLE_NAME, COLUMN_NAME, HISTOGRAM->>'$."number-of-buckets-specified"' AS num_buckets_specified, HISTOGRAM->>'$."histogram-type"' AS histogram_type, JSON_LENGTH(HISTOGRAM, '$.buckets') AS num_buckets_created FROM INFORMATION_SCHEMA.COLUMN_STATISTICS WHERE SCHEMA_NAME = "tinytest";    

Convert equiheight histogram buckets for timestamp to SQL rows

SELECT lvalue, uvalue, concat(round(c*100,1),'%') cumulfreq, CONCAT(round((c - LAG(c, 1, 0) over()) * 100,1), '%') freq, numItems FROM information_schema.column_statistics, JSON_TABLE(histogram->'$.buckets',       '$[*]' COLUMNS(lvalue VARCHAR(60) PATH '$[0]', uvalue VARCHAR(60) PATH '$[1]', c double PATH '$[2]', numItems integer PATH '$[3]')) hist  where column_name = 'timeStamp';

Convert equiheight histogram buckets for String to SQL rows

SELECT FROM_BASE64(SUBSTRING_INDEX(v1, ':', -1)) lvalue, FROM_BASE64(SUBSTRING_INDEX(v2, ':', -1)) uvalue, concat(round(c*100,1),'%') cumulfreq, CONCAT(round((c - LAG(c, 1, 0) over()) * 100,1), '%') freq, numItems FROM information_schema.column_statistics, JSON_TABLE(histogram->'$.buckets',       '$[*]' COLUMNS(v1 VARCHAR(60) PATH '$[0]', v2 VARCHAR(60) PATH '$[1]', c double PATH '$[2]', numItems integer PATH '$[3]')) hist  where column_name = 'user_id';

Convert singleton histogram buckets for String to SQL rows

SELECT FROM_BASE64(SUBSTRING_INDEX(v, ':', -1)) value, concat(round(c*100,1),'%') cumulfreq, CONCAT(round((c - LAG(c, 1, 0) over()) * 100,1), '%') freq FROM information_schema.column_statistics, JSON_TABLE(histogram->'$.buckets',       '$[*]' COLUMNS(v VARCHAR(60) PATH '$[0]', c double PATH '$[1]')) hist  where column_name = 'user_interest';                                                                        

Note

If with the above queries, value turn out to be null then remove BASE64 conversion.

SELECT v as value, concat(round(c*100,1),'%') cumulfreq, CONCAT(round((c - LAG(c, 1, 0) over()) * 100,1), '%') freq FROM information_schema.column_statistics, JSON_TABLE(histogram->'$.buckets',       '$[*]' COLUMNS(v VARCHAR(60) PATH '$[0]', c double PATH '$[1]')) hist  where column_name = 'user_interest';                                                                        

Convert equiheight histogram buckets for Integer to SQL rows

SELECT lvalue, uvalue, concat(round(c*100,1),'%') cumulfreq, CONCAT(round((c - LAG(c, 1, 0) over()) * 100,1), '%') freq, numItems FROM information_schema.column_statistics, JSON_TABLE(histogram->'$.buckets',       '$[*]' COLUMNS(lvalue int PATH '$[0]', uvalue int PATH '$[1]', c double PATH '$[2]', numItems integer PATH '$[3]')) hist  where column_name = 'user_id';

Convert singleton histogram buckets for Date to SQL rows

SELECT value, concat(round(c*100,1),'%') cumulfreq, CONCAT(round((c - LAG(c, 1, 0) over()) * 100,1), '%') freq, numItems FROM information_schema.column_statistics, JSON_TABLE(histogram->'$.buckets',       '$[*]' COLUMNS(value date PATH '$[0]', c double PATH '$[1]', numItems integer PATH '$[2]')) hist  where column_name = 'start_date';

Convert equiheight histograms for Time to SQL rows

 SELECT lvalue, uvalue, concat(round(c*100,1),'%') cumulfreq, CONCAT(round((c - LAG(c, 1, 0) over()) * 100,1), '%') freq, numItems FROM information_schema.column_statistics, JSON_TABLE(histogram->'$.buckets',       '$[*]' COLUMNS(lvalue time PATH '$[0]', uvalue time PATH '$[1]', c double PATH '$[2]', numItems integer PATH '$[3]')) hist  where column_name = 'start_time';

References

  1. https://mysqlserverteam.com/histogram-statistics-in-mysql/
  2. http://oysteing.blogspot.be/2018/02/fosdem-histogram-support-in-mysql-80.html
  3. http://lefred.be/content/mysql-8-0-histograms/
  4. (Database independent on Query Optimization) https://www2.cs.duke.edu/courses/spring03/cps216/lectures/19-qo.pdf
Clone this wiki locally