From 981ba2aaf0f215db6cbe36df1c8c33e7aa830b9c Mon Sep 17 00:00:00 2001
From: foolcage <5533061@qq.com>
Date: Sun, 8 Oct 2023 17:29:27 +0800
Subject: [PATCH] Report with big picture

---
 examples/hot.json               | 43 +++++++++++----
 examples/report_utils.py        | 28 +++++++---
 examples/reports/report_tops.py | 14 +++--
 examples/utils.py               | 93 +++++++++++++++++++++++----------
 src/zvt/api/stats.py            | 24 ++++++---
 5 files changed, 145 insertions(+), 57 deletions(-)

diff --git a/examples/hot.json b/examples/hot.json
index d3427dbf..563d00ac 100644
--- a/examples/hot.json
+++ b/examples/hot.json
@@ -1,7 +1,29 @@
 {
+  "减肥药": [
+    "减肥药"
+  ],
+  "房地产": [
+    "房地产",
+    "新型城镇化",
+    "棚改",
+    "建材"
+  ],
+  "新型工业化": [
+    "新型工业化",
+    "工业母机"
+  ],
+  "华为": [
+    "华为",
+    "mate60 pro,mate",
+    "星闪",
+    "问界",
+    "麒麟",
+    "昇腾",
+    "鸿蒙"
+  ],
   "新能源": [
     "新能源",
-    "锂电 锂电池",
+    "锂电,锂电池",
     "钠离子电池",
     "光伏",
     "太阳能",
@@ -11,34 +33,35 @@
     "核电"
   ],
   "新能车": [
-    "新能车 新能源汽车",
-    "整车 汽车整车",
-    "汽车零部件 汽车零件",
+    "新能车,新能源汽车",
+    "整车,汽车整车",
+    "汽车零部件,汽车零件",
     "无人驾驶",
-    "压铸一体化 一体化压铸"
+    "压铸一体化,一体化压铸"
   ],
   "人工智能": [
-    "人工智能 AI",
-    "GPT CHATGPT",
+    "人工智能,AI",
+    "GPT,CHATGPT",
     "算力"
   ],
   "机器人": [
     "机器人",
     "减速器",
-    "伺服 伺服系统",
+    "伺服,伺服系统",
     "控制系统",
     "电机"
   ],
   "核心资产": [
     "核心资产",
+    "消费,白酒,食品,饮料",
     "白马",
     "沪深300",
     "基金重仓",
     "上证50"
   ],
-  "人民币国际化": [
-    "人民币国际化",
+  "一带一路": [
     "一带一路",
+    "人民币国际化",
     "跨境支付"
   ]
 }
diff --git a/examples/report_utils.py b/examples/report_utils.py
index 8b66594c..d23a1dca 100644
--- a/examples/report_utils.py
+++ b/examples/report_utils.py
@@ -3,7 +3,7 @@
 import time
 from typing import Type
 
-from examples.utils import add_to_eastmoney
+from examples.utils import add_to_eastmoney, group_stocks_by_topic, msg_group_stocks_by_topic
 from zvt import zvt_config
 from zvt.api import get_top_volume_entities, TopType
 from zvt.api.kdata import get_latest_kdata_date, get_kdata_schema, default_adjust_type
@@ -18,7 +18,15 @@
 
 
 def inform(
-    action: EmailInformer, entity_ids, target_date, title, entity_provider, entity_type, em_group, em_group_over_write
+    action: EmailInformer,
+    entity_ids,
+    target_date,
+    title,
+    entity_provider,
+    entity_type,
+    em_group,
+    em_group_over_write,
+    group_by_topic=True,
 ):
     msg = "no targets"
     if entity_ids:
@@ -36,8 +44,12 @@ def inform(
                     f"{target_date} {title} error: {e}",
                 )
 
-        infos = [f"{entity.name}({entity.code})" for entity in entities]
-        msg = "\n".join(infos) + "\n"
+        if group_by_topic and (entity_type == "stock"):
+            msg = msg_group_stocks_by_topic(entities=entities, threshold=1, days_ago=60)
+        else:
+            infos = [f"{entity.name}({entity.code})" for entity in entities]
+            msg = "\n".join(infos) + "\n"
+
     logger.info(msg)
     action.send_message(zvt_config["email_username"], f"{target_date} {title}", msg)
 
@@ -126,7 +138,7 @@ def report_targets(
                 informer,
                 entity_ids=long_stocks,
                 target_date=target_date,
-                title=title,
+                title=f"{entity_type} {title}({len(long_stocks)})",
                 entity_provider=entity_provider,
                 entity_type=entity_type,
                 em_group=em_group,
@@ -159,12 +171,16 @@ def report_top_entities(
     turnover_threshold=100000000,
     turnover_rate_threshold=0.02,
     informer: EmailInformer = None,
+    title="最强",
     em_group=None,
     em_group_over_write=True,
     return_type=TopType.positive,
 ):
     error_count = 0
 
+    if not adjust_type:
+        adjust_type = default_adjust_type(entity_type=entity_type)
+
     while error_count <= 10:
         try:
             target_date = get_latest_kdata_date(
@@ -190,7 +206,7 @@ def report_top_entities(
                 informer,
                 entity_ids=selected,
                 target_date=target_date,
-                title=f"{entity_type} {em_group}({len(selected)})",
+                title=f"{entity_type} {title}({len(selected)})",
                 entity_provider=entity_provider,
                 entity_type=entity_type,
                 em_group=em_group,
diff --git a/examples/reports/report_tops.py b/examples/reports/report_tops.py
index 604572c7..d148656c 100644
--- a/examples/reports/report_tops.py
+++ b/examples/reports/report_tops.py
@@ -31,6 +31,7 @@ def report_top_stocks():
         turnover_threshold=0,
         turnover_rate_threshold=0,
         informer=email_informer,
+        title="短期最强",
         em_group="短期最强",
         em_group_over_write=True,
         return_type=TopType.positive,
@@ -48,6 +49,7 @@ def report_top_stocks():
         turnover_threshold=0,
         turnover_rate_threshold=0,
         informer=email_informer,
+        title="中期最强",
         em_group="中期最强",
         em_group_over_write=True,
         return_type=TopType.positive,
@@ -89,6 +91,7 @@ def report_top_blocks():
         turnover_rate_threshold=0,
         informer=email_informer,
         em_group="最强行业",
+        title="最强行业",
         em_group_over_write=True,
         return_type=TopType.positive,
         entity_ids=entity_ids,
@@ -110,6 +113,7 @@ def report_top_blocks():
         turnover_rate_threshold=0,
         informer=email_informer,
         em_group="最强概念",
+        title="最强概念",
         em_group_over_write=True,
         return_type=TopType.positive,
         entity_ids=entity_ids,
@@ -123,14 +127,15 @@ def report_top_stockhks():
         entity_provider="em",
         data_provider="em",
         top_count=10,
-        periods=[*range(2, 27)],
+        periods=[*range(1, 15)],
         ignore_new_stock=False,
         ignore_st=False,
         adjust_type=None,
         turnover_threshold=30000000,
-        turnover_rate_threshold=0.001,
+        turnover_rate_threshold=0.01,
         informer=email_informer,
         em_group="短期最强",
+        title="短期最强",
         em_group_over_write=False,
         return_type=TopType.positive,
     )
@@ -140,14 +145,15 @@ def report_top_stockhks():
         entity_provider="em",
         data_provider="em",
         top_count=10,
-        periods=[30, 60],
+        periods=[30, 50],
         ignore_new_stock=True,
         ignore_st=False,
         adjust_type=None,
         turnover_threshold=30000000,
-        turnover_rate_threshold=0.001,
+        turnover_rate_threshold=0.01,
         informer=email_informer,
         em_group="中期最强",
+        title="中期最强",
         em_group_over_write=False,
         return_type=TopType.positive,
     )
diff --git a/examples/utils.py b/examples/utils.py
index e43f3efb..9923fdc4 100644
--- a/examples/utils.py
+++ b/examples/utils.py
@@ -9,7 +9,7 @@
 
 from zvt.api.stats import get_top_performance_entities_by_periods
 from zvt.contract.api import get_entities
-from zvt.domain import StockNews
+from zvt.domain import StockNews, Stock
 from zvt.utils import next_date, today
 
 logger = logging.getLogger(__name__)
@@ -53,17 +53,41 @@ def hot_stats(data: pd.Series):
     pass
 
 
-def group_stocks_by_topic(entities, start_timestamp=None):
-    # 默认半年内的新闻
+def group_stocks_by_topic(
+    keyword=None, entities=None, hot_words_config=None, start_timestamp=None, days_ago=60, threshold=3
+):
+    """
+
+    :param keyword:
+    :param entities:
+    :param hot_words_config: hot words config为二重结构，即 主题:[分支1，分支2,...]的形式
+    比如一个有效的item：{"华为":["华为", "mate pro", "星闪", "问界"]}
+    :param start_timestamp:
+    :param days_ago:
+    :param threshold:
+    :return:
+    """
     if not start_timestamp:
-        start_timestamp = next_date(today(), -180)
+        start_timestamp = next_date(today(), -days_ago)
     stock_map = {}
+
+    entity_ids = None
+    if entities:
+        entity_ids = [entity.entity_id for entity in entities]
+    else:
+        entities = Stock.query_data(provider="em", return_type="domain")
+
     for entity in entities:
         stock_map[entity.entity_id] = {"code": entity.code, "name": entity.name}
-    df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=[entity.entity_id for entity in entities])
+
+    filters = None
+    if keyword:
+        filters = [StockNews.news_title.contains(keyword)]
+    df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=entity_ids, filters=filters)
     df = df.groupby("entity_id")["news_title"].apply(",".join).reset_index()
 
-    hot_words_config = get_hot_words_config()
+    if not hot_words_config:
+        hot_words_config = get_hot_words_config()
 
     hot_stocks_map = {}
     topic_count = {}
@@ -73,22 +97,21 @@ def group_stocks_by_topic(entities, start_timestamp=None):
         text = row["news_title"]
 
         is_hot = False
-        # hot words config为二重结构
-        # 即 主题:[分支1，分支2,...]的形式
         for topic in hot_words_config:
             topic_count.setdefault(topic, 0)
             for words in hot_words_config[topic]:
                 hot_stocks_map.setdefault(words, [])
                 word_count.setdefault(words, 0)
-                for word in words.split():
-                    count = text.count(word)
-                    if count > 0:
-                        word_count[words] = word_count[words] + 1
-                        topic_count[topic] = topic_count[topic] + 1
-                        hot_stocks_map[words].append(
-                            (f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count)
-                        )
-                        is_hot = True
+                count = 0
+                for word in words.split(","):
+                    count = text.count(word) + count
+                if count >= threshold:
+                    word_count[words] = word_count[words] + 1
+                    topic_count[topic] = topic_count[topic] + 1
+                    hot_stocks_map[words].append(
+                        (f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count)
+                    )
+                    is_hot = True
         if not is_hot:
             hot_stocks_map.setdefault("其他", [])
             hot_stocks_map["其他"].append((f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", 0))
@@ -106,24 +129,36 @@ def group_stocks_by_topic(entities, start_timestamp=None):
         ]
         result.append((f"{topic}({count})", topic_words_stocks))
 
-    result.append(("其他", [("其他", hot_stocks_map["其他"])]))
+    result.append(("其他", [("其他", hot_stocks_map.get("其他", ""))]))
 
     return result
 
 
-if __name__ == "__main__":
-    ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em")
-
-    entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain")
-
-    group_info = group_stocks_by_topic(entities=entities)
-    info = ""
+def msg_group_stocks_by_topic(
+    keyword=None, entities=None, hot_words_config=None, start_timestamp=None, days_ago=60, threshold=3
+):
+    group_info = group_stocks_by_topic(
+        keyword=keyword,
+        entities=entities,
+        hot_words_config=hot_words_config,
+        start_timestamp=start_timestamp,
+        days_ago=days_ago,
+        threshold=threshold,
+    )
+    msg = ""
     for group in group_info:
         topic = group[0]
-        info = info + f"^^^^^^ {topic} ^^^^^^\n"
+        msg = msg + f"^^^^^^ {topic} ^^^^^^\n"
         for topic_word, stocks_count in group[1]:
-            info = info + f"{topic_word}\n"
+            msg = msg + f"{topic_word}\n"
             stocks = [f"{stock_count[0]} {stock_count[1]}" for stock_count in stocks_count]
-            info = info + "\n".join(stocks) + "\n"
+            msg = msg + "\n".join(stocks) + "\n"
+    return msg
+
+
+if __name__ == "__main__":
+    ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em")
+
+    entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain")
 
-    print(info)
+    print(msg_group_stocks_by_topic(entities=entities, threshold=1))
diff --git a/src/zvt/api/stats.py b/src/zvt/api/stats.py
index 85f9190c..66ff4022 100644
--- a/src/zvt/api/stats.py
+++ b/src/zvt/api/stats.py
@@ -113,17 +113,25 @@ def get_top_performance_entities_by_periods(
     filters = [kdata_schema.entity_id.in_(filter_entity_ids)]
     selected = []
     current_start = None
+    real_period = 1
     for i, period in enumerate(periods):
-        start = next_date(target_date, -period)
-        trade_days = get_trade_dates(start=next_date(target_date, -period), end=target_date)
-        if not trade_days:
-            logger.info(f"no trade days in: {start} to {target_date}")
-            continue
-        if current_start and is_same_date(current_start, trade_days[0]):
-            logger.info("ignore same trade days")
-            continue
+        real_period = max(real_period, period)
+        while True:
+            start = next_date(target_date, -real_period)
+            trade_days = get_trade_dates(start=start, end=target_date)
+            if not trade_days:
+                logger.info(f"no trade days in: {start} to {target_date}")
+                real_period = real_period + 1
+                continue
+            if current_start and is_same_date(current_start, trade_days[0]):
+                logger.info("ignore same trade days")
+                real_period = real_period + 1
+                continue
+            break
         current_start = trade_days[0]
         current_end = trade_days[-1]
+
+        logger.info(f"trade days in: {current_start} to {current_end}, real_period: {real_period} ")
         positive_df, negative_df = get_top_performance_entities(
             entity_type=entity_type,
             start_timestamp=current_start,