From 981ba2aaf0f215db6cbe36df1c8c33e7aa830b9c Mon Sep 17 00:00:00 2001 From: foolcage <5533061@qq.com> Date: Sun, 8 Oct 2023 17:29:27 +0800 Subject: [PATCH] Report with big picture --- examples/hot.json | 43 +++++++++++---- examples/report_utils.py | 28 +++++++--- examples/reports/report_tops.py | 14 +++-- examples/utils.py | 93 +++++++++++++++++++++++---------- src/zvt/api/stats.py | 24 ++++++--- 5 files changed, 145 insertions(+), 57 deletions(-) diff --git a/examples/hot.json b/examples/hot.json index d3427dbf..563d00ac 100644 --- a/examples/hot.json +++ b/examples/hot.json @@ -1,7 +1,29 @@ { + "减肥药": [ + "减肥药" + ], + "房地产": [ + "房地产", + "新型城镇化", + "棚改", + "建材" + ], + "新型工业化": [ + "新型工业化", + "工业母机" + ], + "华为": [ + "华为", + "mate60 pro,mate", + "星闪", + "问界", + "麒麟", + "昇腾", + "鸿蒙" + ], "新能源": [ "新能源", - "锂电 锂电池", + "锂电,锂电池", "钠离子电池", "光伏", "太阳能", @@ -11,34 +33,35 @@ "核电" ], "新能车": [ - "新能车 新能源汽车", - "整车 汽车整车", - "汽车零部件 汽车零件", + "新能车,新能源汽车", + "整车,汽车整车", + "汽车零部件,汽车零件", "无人驾驶", - "压铸一体化 一体化压铸" + "压铸一体化,一体化压铸" ], "人工智能": [ - "人工智能 AI", - "GPT CHATGPT", + "人工智能,AI", + "GPT,CHATGPT", "算力" ], "机器人": [ "机器人", "减速器", - "伺服 伺服系统", + "伺服,伺服系统", "控制系统", "电机" ], "核心资产": [ "核心资产", + "消费,白酒,食品,饮料", "白马", "沪深300", "基金重仓", "上证50" ], - "人民币国际化": [ - "人民币国际化", + "一带一路": [ "一带一路", + "人民币国际化", "跨境支付" ] } diff --git a/examples/report_utils.py b/examples/report_utils.py index 8b66594c..d23a1dca 100644 --- a/examples/report_utils.py +++ b/examples/report_utils.py @@ -3,7 +3,7 @@ import time from typing import Type -from examples.utils import add_to_eastmoney +from examples.utils import add_to_eastmoney, group_stocks_by_topic, msg_group_stocks_by_topic from zvt import zvt_config from zvt.api import get_top_volume_entities, TopType from zvt.api.kdata import get_latest_kdata_date, get_kdata_schema, default_adjust_type @@ -18,7 +18,15 @@ def inform( - action: EmailInformer, entity_ids, target_date, title, entity_provider, entity_type, em_group, em_group_over_write + action: EmailInformer, + entity_ids, + target_date, + title, + entity_provider, + entity_type, + em_group, + em_group_over_write, + group_by_topic=True, ): msg = "no targets" if entity_ids: @@ -36,8 +44,12 @@ def inform( f"{target_date} {title} error: {e}", ) - infos = [f"{entity.name}({entity.code})" for entity in entities] - msg = "\n".join(infos) + "\n" + if group_by_topic and (entity_type == "stock"): + msg = msg_group_stocks_by_topic(entities=entities, threshold=1, days_ago=60) + else: + infos = [f"{entity.name}({entity.code})" for entity in entities] + msg = "\n".join(infos) + "\n" + logger.info(msg) action.send_message(zvt_config["email_username"], f"{target_date} {title}", msg) @@ -126,7 +138,7 @@ def report_targets( informer, entity_ids=long_stocks, target_date=target_date, - title=title, + title=f"{entity_type} {title}({len(long_stocks)})", entity_provider=entity_provider, entity_type=entity_type, em_group=em_group, @@ -159,12 +171,16 @@ def report_top_entities( turnover_threshold=100000000, turnover_rate_threshold=0.02, informer: EmailInformer = None, + title="最强", em_group=None, em_group_over_write=True, return_type=TopType.positive, ): error_count = 0 + if not adjust_type: + adjust_type = default_adjust_type(entity_type=entity_type) + while error_count <= 10: try: target_date = get_latest_kdata_date( @@ -190,7 +206,7 @@ def report_top_entities( informer, entity_ids=selected, target_date=target_date, - title=f"{entity_type} {em_group}({len(selected)})", + title=f"{entity_type} {title}({len(selected)})", entity_provider=entity_provider, entity_type=entity_type, em_group=em_group, diff --git a/examples/reports/report_tops.py b/examples/reports/report_tops.py index 604572c7..d148656c 100644 --- a/examples/reports/report_tops.py +++ b/examples/reports/report_tops.py @@ -31,6 +31,7 @@ def report_top_stocks(): turnover_threshold=0, turnover_rate_threshold=0, informer=email_informer, + title="短期最强", em_group="短期最强", em_group_over_write=True, return_type=TopType.positive, @@ -48,6 +49,7 @@ def report_top_stocks(): turnover_threshold=0, turnover_rate_threshold=0, informer=email_informer, + title="中期最强", em_group="中期最强", em_group_over_write=True, return_type=TopType.positive, @@ -89,6 +91,7 @@ def report_top_blocks(): turnover_rate_threshold=0, informer=email_informer, em_group="最强行业", + title="最强行业", em_group_over_write=True, return_type=TopType.positive, entity_ids=entity_ids, @@ -110,6 +113,7 @@ def report_top_blocks(): turnover_rate_threshold=0, informer=email_informer, em_group="最强概念", + title="最强概念", em_group_over_write=True, return_type=TopType.positive, entity_ids=entity_ids, @@ -123,14 +127,15 @@ def report_top_stockhks(): entity_provider="em", data_provider="em", top_count=10, - periods=[*range(2, 27)], + periods=[*range(1, 15)], ignore_new_stock=False, ignore_st=False, adjust_type=None, turnover_threshold=30000000, - turnover_rate_threshold=0.001, + turnover_rate_threshold=0.01, informer=email_informer, em_group="短期最强", + title="短期最强", em_group_over_write=False, return_type=TopType.positive, ) @@ -140,14 +145,15 @@ def report_top_stockhks(): entity_provider="em", data_provider="em", top_count=10, - periods=[30, 60], + periods=[30, 50], ignore_new_stock=True, ignore_st=False, adjust_type=None, turnover_threshold=30000000, - turnover_rate_threshold=0.001, + turnover_rate_threshold=0.01, informer=email_informer, em_group="中期最强", + title="中期最强", em_group_over_write=False, return_type=TopType.positive, ) diff --git a/examples/utils.py b/examples/utils.py index e43f3efb..9923fdc4 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -9,7 +9,7 @@ from zvt.api.stats import get_top_performance_entities_by_periods from zvt.contract.api import get_entities -from zvt.domain import StockNews +from zvt.domain import StockNews, Stock from zvt.utils import next_date, today logger = logging.getLogger(__name__) @@ -53,17 +53,41 @@ def hot_stats(data: pd.Series): pass -def group_stocks_by_topic(entities, start_timestamp=None): - # 默认半年内的新闻 +def group_stocks_by_topic( + keyword=None, entities=None, hot_words_config=None, start_timestamp=None, days_ago=60, threshold=3 +): + """ + + :param keyword: + :param entities: + :param hot_words_config: hot words config为二重结构,即 主题:[分支1,分支2,...]的形式 + 比如一个有效的item:{"华为":["华为", "mate pro", "星闪", "问界"]} + :param start_timestamp: + :param days_ago: + :param threshold: + :return: + """ if not start_timestamp: - start_timestamp = next_date(today(), -180) + start_timestamp = next_date(today(), -days_ago) stock_map = {} + + entity_ids = None + if entities: + entity_ids = [entity.entity_id for entity in entities] + else: + entities = Stock.query_data(provider="em", return_type="domain") + for entity in entities: stock_map[entity.entity_id] = {"code": entity.code, "name": entity.name} - df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=[entity.entity_id for entity in entities]) + + filters = None + if keyword: + filters = [StockNews.news_title.contains(keyword)] + df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=entity_ids, filters=filters) df = df.groupby("entity_id")["news_title"].apply(",".join).reset_index() - hot_words_config = get_hot_words_config() + if not hot_words_config: + hot_words_config = get_hot_words_config() hot_stocks_map = {} topic_count = {} @@ -73,22 +97,21 @@ def group_stocks_by_topic(entities, start_timestamp=None): text = row["news_title"] is_hot = False - # hot words config为二重结构 - # 即 主题:[分支1,分支2,...]的形式 for topic in hot_words_config: topic_count.setdefault(topic, 0) for words in hot_words_config[topic]: hot_stocks_map.setdefault(words, []) word_count.setdefault(words, 0) - for word in words.split(): - count = text.count(word) - if count > 0: - word_count[words] = word_count[words] + 1 - topic_count[topic] = topic_count[topic] + 1 - hot_stocks_map[words].append( - (f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count) - ) - is_hot = True + count = 0 + for word in words.split(","): + count = text.count(word) + count + if count >= threshold: + word_count[words] = word_count[words] + 1 + topic_count[topic] = topic_count[topic] + 1 + hot_stocks_map[words].append( + (f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count) + ) + is_hot = True if not is_hot: hot_stocks_map.setdefault("其他", []) hot_stocks_map["其他"].append((f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", 0)) @@ -106,24 +129,36 @@ def group_stocks_by_topic(entities, start_timestamp=None): ] result.append((f"{topic}({count})", topic_words_stocks)) - result.append(("其他", [("其他", hot_stocks_map["其他"])])) + result.append(("其他", [("其他", hot_stocks_map.get("其他", ""))])) return result -if __name__ == "__main__": - ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em") - - entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain") - - group_info = group_stocks_by_topic(entities=entities) - info = "" +def msg_group_stocks_by_topic( + keyword=None, entities=None, hot_words_config=None, start_timestamp=None, days_ago=60, threshold=3 +): + group_info = group_stocks_by_topic( + keyword=keyword, + entities=entities, + hot_words_config=hot_words_config, + start_timestamp=start_timestamp, + days_ago=days_ago, + threshold=threshold, + ) + msg = "" for group in group_info: topic = group[0] - info = info + f"^^^^^^ {topic} ^^^^^^\n" + msg = msg + f"^^^^^^ {topic} ^^^^^^\n" for topic_word, stocks_count in group[1]: - info = info + f"{topic_word}\n" + msg = msg + f"{topic_word}\n" stocks = [f"{stock_count[0]} {stock_count[1]}" for stock_count in stocks_count] - info = info + "\n".join(stocks) + "\n" + msg = msg + "\n".join(stocks) + "\n" + return msg + + +if __name__ == "__main__": + ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em") + + entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain") - print(info) + print(msg_group_stocks_by_topic(entities=entities, threshold=1)) diff --git a/src/zvt/api/stats.py b/src/zvt/api/stats.py index 85f9190c..66ff4022 100644 --- a/src/zvt/api/stats.py +++ b/src/zvt/api/stats.py @@ -113,17 +113,25 @@ def get_top_performance_entities_by_periods( filters = [kdata_schema.entity_id.in_(filter_entity_ids)] selected = [] current_start = None + real_period = 1 for i, period in enumerate(periods): - start = next_date(target_date, -period) - trade_days = get_trade_dates(start=next_date(target_date, -period), end=target_date) - if not trade_days: - logger.info(f"no trade days in: {start} to {target_date}") - continue - if current_start and is_same_date(current_start, trade_days[0]): - logger.info("ignore same trade days") - continue + real_period = max(real_period, period) + while True: + start = next_date(target_date, -real_period) + trade_days = get_trade_dates(start=start, end=target_date) + if not trade_days: + logger.info(f"no trade days in: {start} to {target_date}") + real_period = real_period + 1 + continue + if current_start and is_same_date(current_start, trade_days[0]): + logger.info("ignore same trade days") + real_period = real_period + 1 + continue + break current_start = trade_days[0] current_end = trade_days[-1] + + logger.info(f"trade days in: {current_start} to {current_end}, real_period: {real_period} ") positive_df, negative_df = get_top_performance_entities( entity_type=entity_type, start_timestamp=current_start,