From c2e5c86c3a8d93f6d0a98803fdb517e0b88a3a1f Mon Sep 17 00:00:00 2001 From: Henning Merklinger Date: Tue, 23 Aug 2022 10:28:43 +0200 Subject: [PATCH] bump version 0.4.0 --- .gitignore | 3 +- README.md | 34 +- car_example.py | 14 +- insights.svg | 7352 ++++++++++++++++++++++---------------------- requirements.txt | 1 + setup.py | 2 +- tki/__init__.py | 1 + tki/__version__.py | 2 +- tki/aggregators.py | 2 +- tki/app.py | 170 + tki/dimensions.py | 20 +- tki/insights.py | 121 +- tki/tki.py | 4 +- 13 files changed, 4071 insertions(+), 3655 deletions(-) create mode 100644 tki/app.py diff --git a/.gitignore b/.gitignore index 1774af1..04821e3 100644 --- a/.gitignore +++ b/.gitignore @@ -128,4 +128,5 @@ dmypy.json # Pyre type checker .pyre/ -.DS_Store \ No newline at end of file +.DS_Store +insights.pkl \ No newline at end of file diff --git a/README.md b/README.md index 2b7de4b..1f8af9c 100644 --- a/README.md +++ b/README.md @@ -27,8 +27,7 @@ from tki.insights import OutstandingFirstInsight, OutstandingLastInsight, \ from tki.extractors import RankExtractor, DeltaPrevExtractor, \ DeltaMeanExtractor, ProportionExtractor from tki.aggregators import SumAggregator -from tki.dimensions import TemporalDimension, OrdinalDimension, \ - NominalDimension +from tki.dimensions import CardinalDimension, TemporalDimension, NominalDimension data = [ ['H', 2010, 40], ['T', 2010, 38], ['F', 2010, 13], ['B', 2010, 20], @@ -56,8 +55,10 @@ insights = { } tki = TKI( pd.DataFrame(data, columns=['Brand', 'year', 'Cars Sold']), - dimensions=[NominalDimension('Brand'), TemporalDimension('year')], - measurements=[OrdinalDimension('Cars Sold')], + dimensions=[ + NominalDimension('Brand'), + TemporalDimension('year', date_format='%Y', freq='1Y')], + measurements=[CardinalDimension('Cars Sold')], extractors=extractors, aggregators=aggregators, insights=insights, @@ -65,8 +66,7 @@ tki = TKI( result_size=21) tki.run() -_, axes = plt.subplots(7, 3, figsize=(25, 40), dpi=80) -plt.subplots_adjust(hspace=0.3) +fig, axes = plt.subplots(7, 3, figsize=(25, 40), dpi=80) for idx, i in enumerate(tki.heap.insights): plt.axes(axes[int(idx/3)][idx % 3]) i.plot() @@ -74,11 +74,31 @@ for idx, i in enumerate(tki.heap.insights): f"{idx + 1}) {type(i.insight).__name__} " f"score: {i.impact:.2f} * {i.significance:.2f} = {i.score:.2f}\n" f"{(i.sibling_group, i.composite_extractor)}") + x_index = i.data.index.get_level_values(i.data.index.names[-1]) plt.xticks(rotation=0) + if isinstance(x_index, pd.DatetimeIndex): + plt.xticks( + range(i.data.index.size), + x_index.to_series().dt.year) +fig.tight_layout() plt.savefig('insights.svg') - +tki.save('insights.pkl') ```` ### Result ![Insights](./insights.svg) + + +## Web Application + +The web application will provide a convenient interface for the tki package in the future. +At the moment it is possible to visualize saved insights in the browser. +Start the server with + +```` +python -m tki.app +```` + +The project will be accessible via `http://127.0.0.1:8050/` in your web browser. +To display your previously generated results open the `Result` tab and upload your `insights.pkl` file. \ No newline at end of file diff --git a/car_example.py b/car_example.py index 243bef4..fb92d45 100644 --- a/car_example.py +++ b/car_example.py @@ -35,7 +35,9 @@ } tki = TKI( pd.DataFrame(data, columns=['Brand', 'year', 'Cars Sold']), - dimensions=[NominalDimension('Brand'), TemporalDimension('year')], + dimensions=[ + NominalDimension('Brand'), + TemporalDimension('year', date_format='%Y', freq='1Y')], measurements=[CardinalDimension('Cars Sold')], extractors=extractors, aggregators=aggregators, @@ -44,8 +46,7 @@ result_size=21) tki.run() -_, axes = plt.subplots(7, 3, figsize=(25, 40), dpi=80) -plt.subplots_adjust(hspace=0.3) +fig, axes = plt.subplots(7, 3, figsize=(25, 40), dpi=80) for idx, i in enumerate(tki.heap.insights): plt.axes(axes[int(idx/3)][idx % 3]) i.plot() @@ -53,5 +54,12 @@ f"{idx + 1}) {type(i.insight).__name__} " f"score: {i.impact:.2f} * {i.significance:.2f} = {i.score:.2f}\n" f"{(i.sibling_group, i.composite_extractor)}") + x_index = i.data.index.get_level_values(i.data.index.names[-1]) plt.xticks(rotation=0) + if isinstance(x_index, pd.DatetimeIndex): + plt.xticks( + range(i.data.index.size), + x_index.to_series().dt.year) +fig.tight_layout() plt.savefig('insights.svg') +tki.save('insights.pkl') diff --git a/insights.svg b/insights.svg index 2764f1a..2e44ec0 100644 --- a/insights.svg +++ b/insights.svg @@ -6,7 +6,7 @@ - 2022-08-03T19:09:24.614908 + 2022-08-23T10:11:55.507640 image/svg+xml @@ -30,10 +30,10 @@ z - @@ -41,17 +41,17 @@ z - - + - + - + - + @@ -139,12 +139,12 @@ z - + - + - + - + - + - - + - + - + - + - + - + @@ -427,12 +427,12 @@ z - + - + @@ -443,12 +443,12 @@ z - + - + @@ -458,12 +458,12 @@ z - + - + @@ -473,12 +473,12 @@ z - + - + @@ -488,12 +488,12 @@ z - + - + @@ -503,12 +503,12 @@ z - + - + @@ -517,7 +517,7 @@ z - + - + - + - - - - - + - + - - - + @@ -1333,14 +1333,14 @@ L 542.553493 358.698437 - - + - @@ -1401,12 +1401,12 @@ z - + - + @@ -1417,12 +1417,12 @@ z - + - + @@ -1433,12 +1433,12 @@ z - + - + @@ -1449,12 +1449,12 @@ z - + - + @@ -1464,7 +1464,7 @@ z - + @@ -1476,12 +1476,12 @@ z - + - + @@ -1491,12 +1491,12 @@ z - + - + @@ -1504,12 +1504,12 @@ z - + - + @@ -1518,12 +1518,12 @@ z - + - + @@ -1532,12 +1532,12 @@ z - + - + @@ -1545,7 +1545,7 @@ z - + @@ -1575,42 +1575,42 @@ z - + - + - - - - - + - + @@ -1743,27 +1743,27 @@ z - - - + @@ -1777,14 +1777,14 @@ L 1034.906434 358.698437 - - + @@ -1806,10 +1806,10 @@ L 1034.906434 373.376562 - @@ -1817,12 +1817,12 @@ z - + - + @@ -1833,12 +1833,12 @@ z - + - + @@ -1849,12 +1849,12 @@ z - + - + @@ -1865,12 +1865,12 @@ z - + - + @@ -1881,12 +1881,12 @@ z - + - + @@ -1896,7 +1896,7 @@ z - + @@ -1908,12 +1908,12 @@ z - + - + @@ -1923,12 +1923,12 @@ z - + - + @@ -1938,12 +1938,12 @@ z - + - + @@ -1953,12 +1953,12 @@ z - + - + @@ -1968,12 +1968,12 @@ z - + - + @@ -1982,7 +1982,7 @@ z - + @@ -1996,44 +1996,44 @@ z - + - + - - - - - + @@ -2088,7 +2088,7 @@ L 1620 345.6 - + @@ -2136,27 +2136,27 @@ L 1620 345.6 - - - + @@ -2170,14 +2170,14 @@ L 1527.259375 358.698437 - - + @@ -2199,10 +2199,10 @@ L 1527.259375 373.376562 - @@ -2210,12 +2210,12 @@ z - + - + @@ -2226,12 +2226,12 @@ z - + - + @@ -2242,12 +2242,12 @@ z - + - + @@ -2258,12 +2258,12 @@ z - + - + @@ -2273,7 +2273,7 @@ z - + @@ -2285,12 +2285,12 @@ z - + - + @@ -2300,12 +2300,12 @@ z - + - + @@ -2315,12 +2315,12 @@ z - + - + @@ -2330,12 +2330,12 @@ z - + - + @@ -2345,12 +2345,12 @@ z - + - + @@ -2360,12 +2360,12 @@ z - + - + @@ -2375,12 +2375,12 @@ z - + - + @@ -2389,7 +2389,7 @@ z - + @@ -2419,42 +2419,42 @@ z - + - + - - - - - + - + @@ -2625,27 +2625,27 @@ z - - - + @@ -2659,14 +2659,14 @@ L 566.21443 686.298437 - - + @@ -2683,10 +2683,10 @@ L 566.21443 700.976562 - @@ -2694,12 +2694,12 @@ z - + - + - + - + @@ -2737,12 +2737,12 @@ z - + - + - + - + - + @@ -2809,12 +2809,12 @@ z - + - + @@ -2824,12 +2824,12 @@ z - + - + @@ -2839,12 +2839,12 @@ z - + - + @@ -2854,12 +2854,12 @@ z - + - + - + - + - + - + @@ -2971,7 +2971,7 @@ z - + @@ -2985,42 +2985,42 @@ z - + - + - - - - - + @@ -3075,7 +3075,7 @@ L 1127.647059 673.2 - + @@ -3124,27 +3124,27 @@ L 1127.647059 673.2 - - - + @@ -3158,14 +3158,14 @@ L 1034.906434 686.298437 - - + @@ -3187,10 +3187,10 @@ L 1034.906434 700.976562 - @@ -3198,12 +3198,12 @@ z - + - + @@ -3214,12 +3214,12 @@ z - + - + @@ -3230,44 +3230,28 @@ z - + - - + + - + - + - - - - - - - - - - - - - - - - - + @@ -3275,9 +3259,9 @@ z - + - + @@ -3287,134 +3271,143 @@ z - + - + - - - - - - + + + + + + - + - + - - - - - - + + + + - + - + - - - + + + - - + - + - + - - - - - - + + + + + - + - + - - - - - - + + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - + - - - - - - - + + + @@ -3429,46 +3422,47 @@ L 1620 673.2 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + @@ -3510,33 +3504,53 @@ L 1620 673.2 - - + + + + + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -3549,15 +3563,15 @@ L 1527.259375 686.298437 - - + - + - + @@ -3579,39 +3593,55 @@ L 1527.259375 700.976562 - + + + + + + + + + + + + + + + + - + - - + + - + - + - + @@ -3622,38 +3652,22 @@ z - + - - - - - - - - - - - - - - - - - - + + - + - + - + @@ -3663,14 +3677,14 @@ z - + - + - + - + @@ -3678,63 +3692,63 @@ z - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + @@ -3763,43 +3777,43 @@ z + + + - - - - + - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -3934,27 +3937,27 @@ z - - - + - + - + @@ -3967,53 +3970,48 @@ L 542.553493 1013.898437 - - + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + - - - + + - + - + - + @@ -4021,15 +4019,15 @@ z - - + + - + - + - + @@ -4037,41 +4035,41 @@ z - - + + - + - - - + + + - + - - + + - + - - - + + + - + - + - + @@ -4081,78 +4079,147 @@ z + + + + + + + + + + + + + + + + - + - - + + - + + - + - + - - - + + + + + + - + - + - - - - + + + + + + - + - + - - - - + + + + + - + - + - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + @@ -4181,87 +4248,97 @@ z - - + + - - + + - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + @@ -4306,50 +4383,66 @@ L 1127.647059 1000.8 - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -4362,48 +4455,53 @@ L 746.352941 1013.898437 - - + - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + - - - + + - + - + - + @@ -4411,15 +4509,15 @@ z - - + + - + - + - + @@ -4427,41 +4525,41 @@ z - - + + - + - - - + + + - + - - + + - + - - - + + + - + - + - + @@ -4470,15 +4568,15 @@ z - - + + - + - + - + @@ -4486,15 +4584,15 @@ z - - + + - + - + - + @@ -4502,15 +4600,15 @@ z - - + + - + - + - + @@ -4518,15 +4616,15 @@ z - - + + - + - + - + @@ -4534,84 +4632,84 @@ z - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + - + - + @@ -4640,97 +4738,87 @@ z - - + + - - + + - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -4814,27 +4902,27 @@ L 1620 1000.8 - - - + - + - + @@ -4847,201 +4935,208 @@ L 1527.259375 1013.898437 - - + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + - + + + + + + + + + + + + + + + + - + - + - - - - + + + + + + + - + - + - - - - + + + + + + + - + - + - - - - + + + + + + + - + - + - - - - + + + + + + + - - - - - - - - + + + + + + + - - + + - + - + - + - - + + - + - - - + + + - + - - + + - + - - - + + + - + - - + + - + - - - + + + - + - - + + - + - - - + + + - - - - - - - - - - - - - - - - + - + - + @@ -5054,167 +5149,158 @@ zz - - + + - + - + - + @@ -5297,15 +5394,15 @@ z - - + + - + - + - + @@ -5313,15 +5410,15 @@ z - - + + - + - + - + @@ -5329,9 +5426,9 @@ z - + - + @@ -5340,153 +5437,149 @@ z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - + + + - + - + - - - + + + - + - + - - - + + + - + - + - - - + + + - + - + - - - + + + - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -5510,43 +5603,45 @@ z - - + + - - + + - - - - - - - + + + @@ -5580,18 +5675,18 @@ L 1127.647059 1328.4 - - + + - - + + - - + + @@ -5648,54 +5743,33 @@ L 1127.647059 1328.4 - - - - - - - - - - - - - - - - - - - - - - - + + - - - + - + - + @@ -5708,15 +5782,15 @@ L 1058.567371 1341.498437 - - + - + - + @@ -5733,23 +5807,23 @@ L 1058.567371 1356.176562 - - - + + - + - + - + @@ -5757,15 +5831,15 @@ z - - + + - + - + - + @@ -5773,15 +5847,15 @@ z - - + + - + - + - + @@ -5789,15 +5863,15 @@ z - - + + - + - + - + @@ -5805,15 +5879,15 @@ z - - + + - + - + - + @@ -5821,9 +5895,9 @@ z - + - + @@ -5832,302 +5906,371 @@ zz - - + + - + - + - + @@ -6151,15 +6294,15 @@ z - - + + - + - + - + @@ -6167,15 +6310,15 @@ z - - + + - + - + - + @@ -6183,15 +6326,15 @@ z - - + + - + - + - + @@ -6199,9 +6342,9 @@ z - + - + @@ -6210,144 +6353,99 @@ z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - - - + + + + - - - - - - - - - - - - - - - - - + + - + - - - - + + + + - + - - + + - + - - - - + + + + - + - - + + - + - - - - + + + + - + - - + + - + - - - - + + + + - + - - + + - + - - - - + + + + - + - + @@ -6376,243 +6474,202 @@ zz - + - + - + @@ -6637,40 +6694,40 @@ z - + - + - - + + - + - + - + - - + + - + - + @@ -6679,99 +6736,148 @@ z - - + + - + - - - - - + + + + + + - - + + - + - - - - - + + + + + + - - + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + + + + + + + + + + + + + + + + - - - - + + + + - + - - + + - + - - - - + + + + - + - - + + - + - - - - + + + + - + - + @@ -6800,102 +6906,99 @@ z - - + + + + + - - - - - - - - - - + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + @@ -6947,39 +7050,59 @@ z - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -6992,75 +7115,69 @@ L 1072.973621 1669.098437 + + + + + + + + + + + + + + + + + + + + + + + - - + - + - - - + + + - + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - + - + @@ -7068,248 +7185,245 @@ zz - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - + - - - - - - - + + + + + - + - + - - - - - - + + + + + - + - + - - - - - - + + + + + - + - + - - - + + + - - + - + - + - - - + + + - - + - + - + - - - + + + - - + + + + + + + + + + + + + + + - + - + @@ -7679,43 +7762,45 @@ z - - + + - - + + - - - - - + - + @@ -7770,8 +7855,8 @@ L 635.294118 1983.6 - - + + @@ -7816,66 +7901,51 @@ L 635.294118 1983.6 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -7888,15 +7958,15 @@ L 542.553493 1996.698437 - - + - + - + @@ -7918,23 +7988,23 @@ L 542.553493 2011.376563 - - - + + - + - + - + @@ -7942,15 +8012,15 @@ z - - + + - + - + - + @@ -7958,31 +8028,15 @@ z - - - - - - - - - - - - - - - - - + - + - + - + @@ -7991,14 +8045,14 @@ z - + - + - + - + @@ -8006,9 +8060,9 @@ z - + - + @@ -8017,130 +8071,214 @@ z - - - - - - - - - - - - - - - + - - - - + + + + + + - + - - - - + + + + + + - + - - - - + + + + + - + - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + - - + + - - - - - - - + + + @@ -8156,47 +8294,46 @@ L 1127.647059 1983.6 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + @@ -8238,33 +8375,53 @@ L 1127.647059 1983.6 - - + + + + + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -8277,15 +8434,15 @@ L 1034.906434 1996.698437 - - + - + - + @@ -8307,23 +8464,23 @@ L 1034.906434 2011.376563 - - + - + - + - + @@ -8332,14 +8489,14 @@ z - + - + - + - + @@ -8348,46 +8505,46 @@ z - + - + - - - + + + - + - + - + - - - + + + - + - + - + - + - + @@ -8395,9 +8552,9 @@ z - + - + @@ -8406,107 +8563,121 @@ z - - + + - + - - - - - + + + + + + + - - + + - + - - - - - + + + + + + + - - + + - + - - - - - + + + + + + + - - + + - + - - - - - + + + + + + + - - + + - + - - - - - + + + + + + + - - + + - + - - - - - + + + + + + + - - + + - + - - - - - + + + + + + + - + - + @@ -8535,45 +8706,45 @@ z - - + + - - + + - - - - - + - + @@ -8628,8 +8799,8 @@ L 1620 1983.6 - - + + @@ -8674,51 +8845,52 @@ L 1620 1983.6 - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -8731,15 +8903,15 @@ L 1527.259375 1996.698437 - - + - + - + @@ -8761,81 +8933,81 @@ L 1527.259375 2011.376563 - - + - + - - - + + + - + - + - + - - - + + + - + - + - + - - - + + + - + - + - + - - - + + + - + - + - + @@ -8844,15 +9016,15 @@ z - - + + - + - + - + @@ -8860,15 +9032,15 @@ z - - + + - + - + - + @@ -8876,75 +9048,75 @@ z - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + - - + + - + - + - + @@ -8952,15 +9124,15 @@ z - - + + - + - + - + @@ -8968,15 +9140,15 @@ z - - + + - + - + - + @@ -8984,9 +9156,9 @@ z - + - + @@ -9015,98 +9187,88 @@ z - - + + - - + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -9174,27 +9336,27 @@ L 635.294118 2311.2 - - - + - + - + @@ -9207,127 +9369,106 @@ L 542.553493 2324.298437 - - + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + - - + - + - - - + + + - + - + - + - - - + + + - + - + - + - - - + + + - + - - - - - - - - - - - - - - - - - + - + - - - + + + - + - + - + @@ -9336,121 +9477,114 @@ z - - + + - + - - - - + + + + - - + - - + + - + - - - - + + + + - - + - - + + - + - - - - + + + + - - + - - + + - + - - - - + + + + - - + - - + + - + - - - - + + + + - - + - - + + - + - - - - + + + + - - + - - + + - + - - - - + + + + - - + - + - + @@ -9478,102 +9612,89 @@ z - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - + + @@ -9618,52 +9739,67 @@ L 1127.647059 2311.2 - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -9676,53 +9812,48 @@ L 1034.906434 2324.298437 - - + - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + - - - + + - + - + - + @@ -9730,15 +9861,15 @@ z - - + + - + - + - + @@ -9746,15 +9877,15 @@ z - - + + - + - + - + @@ -9762,15 +9893,15 @@ z - - + + - + - + - + @@ -9778,9 +9909,9 @@ z - + - + @@ -9789,149 +9920,114 @@ z - - + + - + - - - - - - - + + + + + + - - + + - + - - - - - - - + + + + + + - - + + - + - - - - + + + + - - + + - + - + - + - - + + - + - - - - + + + + - - + + - + - - - - + + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + - + - - - - - - - + + + + + + - + - + @@ -9960,43 +10056,43 @@ z - - + + - - + + - - - - - + - + @@ -10040,8 +10136,8 @@ L 1620 2311.2 - - + + @@ -10103,33 +10199,49 @@ L 1620 2311.2 - - + + + + + + + + + + + + + + + + + + - - - + - + - + @@ -10142,15 +10254,15 @@ L 1238.705882 2324.298437 - - + - + - + @@ -10167,68 +10279,68 @@ L 1238.705882 2338.976562 - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + - - + + diff --git a/requirements.txt b/requirements.txt index 566db9f..1bd3839 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ matplotlib==3.5.* numpy==1.23.* pandas==1.4.* scipy==1.9.* +dash==2.6.* diff --git a/setup.py b/setup.py index c88a2fe..bd86287 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ packages=['tki'], include_package_data=True, python_requires=">=3.8.*", - install_requires=['numpy', 'pandas', 'scipy', 'matplotlib'], + install_requires=['numpy', 'pandas', 'scipy', 'matplotlib', 'dash'], license=about['__license__'], zip_safe=False, keywords='insights' diff --git a/tki/__init__.py b/tki/__init__.py index 9ead1c1..5490587 100644 --- a/tki/__init__.py +++ b/tki/__init__.py @@ -4,3 +4,4 @@ https://github.com/Der-Henning/TopK-Insights """ from tki.tki import TKI +from tki.app import App diff --git a/tki/__version__.py b/tki/__version__.py index 4269ec8..f2cbddd 100644 --- a/tki/__version__.py +++ b/tki/__version__.py @@ -2,7 +2,7 @@ __title__ = 'TKI' __description__ = 'TKI - Top-K Insights' -__version__ = '0.3.0' +__version__ = '0.4.0' __author__ = 'Henning Merklinger' __author_email__ = 'henning.merklinger@gmail.com' __license__ = 'MIT' diff --git a/tki/aggregators.py b/tki/aggregators.py index e396525..941bf60 100644 --- a/tki/aggregators.py +++ b/tki/aggregators.py @@ -28,7 +28,7 @@ def __init__(self, measurement: Dimension): self.measurement = measurement def agg(self, group: pd.core.groupby.DataFrameGroupBy) -> pd.DataFrame: - """Applys aggregation function to a grouped DataFrame. + """Applies aggregation function to a grouped DataFrame. Arguments --------- diff --git a/tki/app.py b/tki/app.py new file mode 100644 index 0000000..3edbe11 --- /dev/null +++ b/tki/app.py @@ -0,0 +1,170 @@ +"""Module containing a dash application as interface for tki""" +from typing import NoReturn, List, Tuple, Any +import base64 +import io +import pickle +import pandas as pd +import dash +from dash import Dash, html, dcc, Output, Input, dash_table, ALL +from dash.exceptions import PreventUpdate + +from tki.insights import InsightResult + +class Upload(dcc.Upload): + """Formatted Upload control""" + def __init__(self, *args, **kwargs): + super().__init__( + children=html.Div([ + 'Drag and Drop or ', + html.A('Select File') + ]), + style={ + 'width': '90%', + 'height': '60px', + 'lineHeight': '60px', + 'borderWidth': '1px', + 'borderStyle': 'dashed', + 'borderRadius': '5px', + 'textAlign': 'center', + 'margin': '10px auto' + }, + *args, **kwargs) + +class App(): + """Web application providing an interface for TKI + + Parameters + ---------- + port : int + Port used to serve the application. Defaults to 8050 + debug : bool + Tun on debug mode. Defaults to False + """ + # TODO: + # Currently working: Upload and visualize Insight Results + # Create Interface to initialize TKI and run the calculation + # Global variables = bad. Only for single user sessions. + # For multi session capability propably a database (Redis?) is needed. + def __init__(self, port: int = 8050, debug: bool = False) -> None: + self._port = port + self._debug = debug + + # global variables + self._data: pd.DataFrame = None + self._insights: List[InsightResult] = None + self._dim_setting = {} + + # Initialize Dash Application + self._app = Dash("TKI") + self._app.title = "Top-K Insights" + + # 2-Tab Layout + self._app.layout = html.Div([ + html.H1("Top-K Insights"), + dcc.Tabs([ + dcc.Tab(label='Analysis', children=[ + html.Div([ + html.H4('Upload Data'), + Upload( + id='upload-data', + multiple=False), + dash_table.DataTable( + id='data-table', + style_table={'overflowX': 'auto'}, + column_selectable="multi", + page_current=0, + page_size=10), + ]), + html.Div(id='dimension-selector'), + html.Button('Calculate Insights', id='calc-insights') + ]), + dcc.Tab(label='Results', children=[ + html.H4('Upload Insight Result file'), + Upload( + id='upload-results', + multiple=False), + html.Div(id='insight-container') + ]) + ]) + ]) + + # Callbacks + # Upload data as .csv + self._app.callback( + [Output('data-table', 'data'), + Output('data-table', 'columns')], + Input('upload-data', 'contents') + )(self._upload_data) + + # Manage selected dimensions + self._app.callback( + Output('dimension-selector', 'children'), + Input('data-table', 'selected_columns') + )(self._update_dimension_selector) + + # On dimension dropdown select save selection + self._app.callback( + Output('insight-container', 'style'), + Input({'type': 'dimension-dropdown', 'index': ALL}, 'value') + )(self._update_dimension_dropdown) + + # Upload Insight Results as .pkl + self._app.callback( + Output('insight-container', 'children'), + Input('upload-results', 'contents') + )(self._update_graphs) + + def _upload_data(self, content: str) -> Tuple[dict, dict]: + if content is None: + raise PreventUpdate + _, content_string = content.split(',') + decoded = base64.b64decode(content_string) + self._data = pd.read_csv(io.BytesIO(decoded)) + return (self._data.to_dict('records'), + [{"name": i, "id": i, "selectable": True} for i in self._data.columns]) + + def _update_dimension_selector(self, columns: List[str] + ) -> List[dash.development.base_component.Component]: + if columns is None: + raise PreventUpdate + dims = [html.Div([ + html.Label(col), + dcc.Dropdown( + ['Nominal', 'Cardinal'], + self._dim_setting[col] if col in self._dim_setting else None, + id={'type': 'dimension-dropdown', 'index': col}) + ]) for col in columns] + for key in list(self._dim_setting.keys()): + if not key in columns: + del self._dim_setting[key] + return [html.H4('Edit Dimensions'), *dims] + + def _update_dimension_dropdown(self, _: List[Any]) -> None: + for dropdown in dash.callback_context.inputs_list[0]: + self._dim_setting[dropdown['id']['index']] = dropdown['value'] + raise PreventUpdate + + def _update_graphs(self, content: str) -> List[html.Div]: + if content is None: + raise PreventUpdate + _, content_string = content.split(',') + decoded = base64.b64decode(content_string) + self._insights = pickle.load(io.BytesIO(decoded)) + return self._generate_insight_figures() + + def _generate_insight_figures(self) -> List[html.Div]: + return [html.Div([ + html.H4(f"{idx}) {type(insight.insight).__name__} - " + f"Score: {insight.score:.2f}"), + html.Div( + f"{(insight.sibling_group, insight.composite_extractor)}"), + dcc.Graph(figure=insight.fig) + ]) for idx, insight in enumerate(self._insights, 1)] + + def run(self) -> NoReturn: + """Starts webserver""" + self._app.run(debug=self._debug, port=self._port) + +if __name__ == "__main__": + app = App() + app.run() \ No newline at end of file diff --git a/tki/dimensions.py b/tki/dimensions.py index 74a5efe..7fae7b9 100644 --- a/tki/dimensions.py +++ b/tki/dimensions.py @@ -11,7 +11,7 @@ class Dimension(): ---------- name : str Name of column in Dataset - dependent_dimensions : List[Dimension] + dependent_dimensions : List[str] List of dependent dimensions. e.g. Country -> Region value : str Current value reducing the Subspace. @@ -24,7 +24,7 @@ class Dimension(): def __init__(self, name: str, - dependent_dimensions: List[Dimension] = None, + dependent_dimensions: List[str] = None, value: str = '*'): self.name = name self.dependent_dimensions = \ @@ -64,7 +64,7 @@ class NominalDimension(Dimension): ---------- name : str Name of column in Dataset - dependent_dimensions : List[Dimension] + dependent_dimensions : List[str] List of dependent dimensions. e.g. Country -> Region value : str Current value reducing the Subspace. @@ -80,7 +80,7 @@ class OrdinalDimension(NominalDimension): ---------- name : str Name of column in Dataset - dependent_dimensions : List[Dimension] + dependent_dimensions : List[str] List of dependent dimensions. e.g. Country -> Region value : str Current value reducing the Subspace. @@ -96,7 +96,7 @@ class CardinalDimension(OrdinalDimension): ---------- name : str Name of column in Dataset - dependent_dimensions : List[Dimension] + dependent_dimensions : List[str] List of dependent dimensions. e.g. Country -> Region value : str Current value reducing the Subspace. @@ -109,7 +109,7 @@ class CardinalDimension(OrdinalDimension): def __init__(self, name: str, - dependent_dimensions: List[Dimension] = None, + dependent_dimensions: List[str] = None, value: str = '*', bins: int = 10): super().__init__(name, dependent_dimensions, value) @@ -128,7 +128,7 @@ class TemporalDimension(CardinalDimension): ---------- name : str Name of column in Dataset - dependent_dimensions : List[Dimension] + dependent_dimensions : List[str] List of dependent dimensions. e.g. Country -> Region value : str Current value reducing the Subspace. @@ -147,7 +147,7 @@ class TemporalDimension(CardinalDimension): def __init__(self, name: str, - dependent_dimensions: List[Dimension] = None, + dependent_dimensions: List[str] = None, value: str = '*', bins: int = 0, date_format: 'str' = None, @@ -157,8 +157,8 @@ def __init__(self, self.freq = freq def preprocess(self, data: pd.Series) -> pd.Series: - if not self.date_format: - return super().preprocess(data) + # if not self.date_format: + # return super().preprocess(data) return super().preprocess( pd.to_datetime(data, format=self.date_format)) diff --git a/tki/insights.py b/tki/insights.py index 8dcd28c..af32fa2 100644 --- a/tki/insights.py +++ b/tki/insights.py @@ -5,6 +5,7 @@ from typing import Union, Callable from scipy.stats import rv_continuous, norm, logistic, t, linregress, pearsonr from scipy.optimize import curve_fit +import plotly.graph_objects as go import matplotlib.pyplot as plt import pandas as pd import numpy as np @@ -91,6 +92,12 @@ def plot(self) -> None: """Visualizes the insight result using matplotlib""" self.insight.plot(self) + @property + def fig(self) -> go.Figure: + """Returns a plotly.graph_objects.Figure containing + a visualization of the Insight Result""" + return self.insight.fig(self) + class Insight(): """Parent class for Insights""" @@ -137,6 +144,39 @@ def plot(self, result: InsightResult) -> None: f"{(result.sibling_group, result.composite_extractor)}") plt.legend() + def fig(self, result: InsightResult) -> go.Figure: + """Creates a Visualization of the Insight Result + + Arguments + --------- + result : InsightResult + + Returns + ------- + plotly.graph_objects.Figure + """ + derived_measure = len(result.composite_extractor.extractors) > 0 + fig = go.Figure( + layout=go.Layout( + xaxis = { + 'tickmode': 'array', + 'tickvals': list(range(1, len(result.data.values) + 1)), + 'ticktext': result.data.index.get_level_values( + result.data.index.names[-1]), + 'title': result.data.index.names[-1]}, + yaxis = { + 'title': f"{'Derived measure ' if derived_measure else ''}" + f"{result.composite_extractor.aggregator.measurement}"}, + legend={ + 'orientation': "h", + 'yanchor': "bottom", 'y': 1.02, + 'xanchor': "right", 'x': 1 } )) + fig.add_trace(go.Scatter( + name=str(result.sibling_group.subspace), + x=list(range(1, len(result.data.values) + 1)), + y=result.data.values, showlegend=True)) + return fig + class PointInsight(Insight): """Parent class for Point Insights""" @@ -166,6 +206,34 @@ def plot(self, result: InsightResult) -> None: plt.title(f"{type(self).__name__} - score: {result.score:.2f}") plt.legend() + def fig(self, result: InsightResult) -> go.Figure: + derived_measure = len(result.composite_extractor.extractors) > 0 + x_data = list(range(1, result.data.columns.size + 1)) + fig = go.Figure( + layout=go.Layout( + xaxis = { + 'tickmode': 'array', + 'tickvals': x_data, + 'ticktext': result.data.columns.get_level_values( + result.data.columns.names[-1]), + 'title': result.data.columns.names[-1]}, + yaxis = { + 'title': f"{'Derived measure ' if derived_measure else ''}" + f"{result.composite_extractor.aggregator.measurement}"}, + legend={ + 'orientation': "h", + 'yanchor': "bottom", 'y': 1.02, + 'xanchor': "right", 'x': 1} )) + for loc, row in result.data.iterrows(): + result.sibling_group.subspace.set( + result.sibling_group.dividing_dimension, loc) + fig.add_trace(go.Scatter( + name=str(result.sibling_group.subspace), + x=x_data, y=row.values)) + result.sibling_group.subspace.set( + result.sibling_group.dividing_dimension, '*') + return fig + class OutstandingFirstInsight(PointInsight): """By predicting a given distribution for the values sorted descending @@ -218,6 +286,11 @@ def calc_insight(self, extraction_result: ExtractionResult) -> InsightResult: residuals = ydata - prediction # Fit location parameters of the distribution of residuals + # TODO: + # scipy >= 1.9.0 provides a new fit function + # scipy.stats.fit(dist, data) -> FitResult function + # Resulting params don't make sence though... + # fitResult = stats.fit(self.stat_dist, residuals[1:]) loc, scale = self.stat_dist.fit(residuals[1:]) dist_params = {'loc': loc, 'scale': scale} @@ -242,6 +315,15 @@ def plot(self, result: InsightResult) -> None: plt.plot(result.prediction, label="null hypothesis") plt.legend() + def fig(self, result: InsightResult) -> go.Figure: + fig = super().fig(result) + fig.add_trace(go.Scatter( + name="Null-Hypothesis", + x=list(range(1, len(result.data.values) + 1)), + y=result.prediction, + mode="lines", line={'color': "orange"})) + return fig + class OutstandingLastInsight(PointInsight): """By predicting a given distribution for the values sorted descending @@ -273,6 +355,12 @@ def calc_insight(self, extraction_result: ExtractionResult) -> InsightResult: ydata = data.values xdata = range(1, ydata.size + 1) + diff = data.diff() + if diff.sum() == 0: + raise InsightError("OutstandingLastInsight: No variance") + if diff[-1] / diff.sum() < 1 / data.size: + raise InsightError("OutstandingLastInsight: Too low variance on tail") + # Linear distributions lead to a perfect fit and a high score # Insight is meaningless -> return None if (np.unique(np.diff(ydata)).size == 1) or \ @@ -318,6 +406,15 @@ def plot(self, result: InsightResult) -> None: plt.plot(result.prediction, label="null hypothesis") plt.legend() + def fig(self, result: InsightResult) -> go.Figure: + fig = super().fig(result) + fig.add_trace(go.Scatter( + name="Null-Hypothesis", + x=list(range(1, len(result.data.values) + 1)), + y=result.prediction, + mode="lines", line={'color': "orange"})) + return fig + class EvennessInsight(PointInsight): """The Evenness Insight returns a score describing the evenness of @@ -339,12 +436,12 @@ def calc_insight(self, extraction_result: ExtractionResult) -> InsightResult: raise InsightError("EvennessInsight: data contains values < 0") # Prevent dividing by zero - sum = np.sum(data.values) - if sum == 0: + data_sum = np.sum(data.values) + if data_sum == 0: raise InsightError("EvennessInsight: Sum of values is zero") # Calculate the Shannon-Index - p = data.values / np.sum(data.values) + p = data.values / data_sum shannon = -np.sum(p * np.log(p, out=np.zeros_like(p), where=(p != 0))) / np.log(data.size) @@ -417,15 +514,12 @@ def calc_insight(self, extraction_result: ExtractionResult) -> InsightResult: not extraction_result["sibling_group"].dividing_dimension.is_temporal: raise InsightError("TrendInsight: dimension is not temporal") - # Scaling factor for normalization - scale = (np.max(data.values) - np.min(data.values)) / data.values.size - # Fit linear regression on the data result = linregress( x=range(data.values.size), - y=data.values if scale == 0 else data.values / scale) - slope = result.slope * scale - intercept = result.intercept * scale + y=data.values) + slope = result.slope + intercept = result.intercept r_value = result.rvalue # Calculate the p-value for the slope on the given distribution @@ -455,6 +549,15 @@ def plot(self, result: InsightResult) -> None: plt.plot(y_data, label="regression") plt.legend() + def fig(self, result: InsightResult) -> go.Figure: + fig = super().fig(result) + x_data = np.arange(1, len(result.data.values) + 1) + fig.add_trace(go.Scatter( + name="regression", x=x_data, + y=result.intercept + (x_data - 1) * result.slope, + mode="lines", line={'color': "orange"})) + return fig + class CorrelationInsight(CompoundInsight): """Correlation Insights measure the correlation between two series. diff --git a/tki/tki.py b/tki/tki.py index bb4e540..183e61f 100644 --- a/tki/tki.py +++ b/tki/tki.py @@ -197,8 +197,8 @@ def _extract(self, subspace: Subspace, compound: bool = False # block dependent dimensions if (len(dimension_pair) > 1 and - (dimension_pair[0] in dimension_pair[1].dependent_dimensions or - dimension_pair[1] in dimension_pair[0].dependent_dimensions)): + (dimension_pair[0].name in dimension_pair[1].dependent_dimensions or + dimension_pair[1].name in dimension_pair[0].dependent_dimensions)): log.debug('Skipping dependent Dimensions %s', dimension_pair) continue try: