Skip to content

Commit

Permalink
Report with big picture
Browse files Browse the repository at this point in the history
  • Loading branch information
foolcage committed Oct 8, 2023
1 parent 8fa5d59 commit 981ba2a
Show file tree
Hide file tree
Showing 5 changed files with 145 additions and 57 deletions.
43 changes: 33 additions & 10 deletions examples/hot.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,29 @@
{
"减肥药": [
"减肥药"
],
"房地产": [
"房地产",
"新型城镇化",
"棚改",
"建材"
],
"新型工业化": [
"新型工业化",
"工业母机"
],
"华为": [
"华为",
"mate60 pro,mate",
"星闪",
"问界",
"麒麟",
"昇腾",
"鸿蒙"
],
"新能源": [
"新能源",
"锂电 锂电池",
"锂电,锂电池",
"钠离子电池",
"光伏",
"太阳能",
Expand All @@ -11,34 +33,35 @@
"核电"
],
"新能车": [
"新能车 新能源汽车",
"整车 汽车整车",
"汽车零部件 汽车零件",
"新能车,新能源汽车",
"整车,汽车整车",
"汽车零部件,汽车零件",
"无人驾驶",
"压铸一体化 一体化压铸"
"压铸一体化,一体化压铸"
],
"人工智能": [
"人工智能 AI",
"GPT CHATGPT",
"人工智能,AI",
"GPT,CHATGPT",
"算力"
],
"机器人": [
"机器人",
"减速器",
"伺服 伺服系统",
"伺服,伺服系统",
"控制系统",
"电机"
],
"核心资产": [
"核心资产",
"消费,白酒,食品,饮料",
"白马",
"沪深300",
"基金重仓",
"上证50"
],
"人民币国际化": [
"人民币国际化",
"一带一路": [
"一带一路",
"人民币国际化",
"跨境支付"
]
}
28 changes: 22 additions & 6 deletions examples/report_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import time
from typing import Type

from examples.utils import add_to_eastmoney
from examples.utils import add_to_eastmoney, group_stocks_by_topic, msg_group_stocks_by_topic
from zvt import zvt_config
from zvt.api import get_top_volume_entities, TopType
from zvt.api.kdata import get_latest_kdata_date, get_kdata_schema, default_adjust_type
Expand All @@ -18,7 +18,15 @@


def inform(
action: EmailInformer, entity_ids, target_date, title, entity_provider, entity_type, em_group, em_group_over_write
action: EmailInformer,
entity_ids,
target_date,
title,
entity_provider,
entity_type,
em_group,
em_group_over_write,
group_by_topic=True,
):
msg = "no targets"
if entity_ids:
Expand All @@ -36,8 +44,12 @@ def inform(
f"{target_date} {title} error: {e}",
)

infos = [f"{entity.name}({entity.code})" for entity in entities]
msg = "\n".join(infos) + "\n"
if group_by_topic and (entity_type == "stock"):
msg = msg_group_stocks_by_topic(entities=entities, threshold=1, days_ago=60)
else:
infos = [f"{entity.name}({entity.code})" for entity in entities]
msg = "\n".join(infos) + "\n"

logger.info(msg)
action.send_message(zvt_config["email_username"], f"{target_date} {title}", msg)

Expand Down Expand Up @@ -126,7 +138,7 @@ def report_targets(
informer,
entity_ids=long_stocks,
target_date=target_date,
title=title,
title=f"{entity_type} {title}({len(long_stocks)})",
entity_provider=entity_provider,
entity_type=entity_type,
em_group=em_group,
Expand Down Expand Up @@ -159,12 +171,16 @@ def report_top_entities(
turnover_threshold=100000000,
turnover_rate_threshold=0.02,
informer: EmailInformer = None,
title="最强",
em_group=None,
em_group_over_write=True,
return_type=TopType.positive,
):
error_count = 0

if not adjust_type:
adjust_type = default_adjust_type(entity_type=entity_type)

while error_count <= 10:
try:
target_date = get_latest_kdata_date(
Expand All @@ -190,7 +206,7 @@ def report_top_entities(
informer,
entity_ids=selected,
target_date=target_date,
title=f"{entity_type} {em_group}({len(selected)})",
title=f"{entity_type} {title}({len(selected)})",
entity_provider=entity_provider,
entity_type=entity_type,
em_group=em_group,
Expand Down
14 changes: 10 additions & 4 deletions examples/reports/report_tops.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def report_top_stocks():
turnover_threshold=0,
turnover_rate_threshold=0,
informer=email_informer,
title="短期最强",
em_group="短期最强",
em_group_over_write=True,
return_type=TopType.positive,
Expand All @@ -48,6 +49,7 @@ def report_top_stocks():
turnover_threshold=0,
turnover_rate_threshold=0,
informer=email_informer,
title="中期最强",
em_group="中期最强",
em_group_over_write=True,
return_type=TopType.positive,
Expand Down Expand Up @@ -89,6 +91,7 @@ def report_top_blocks():
turnover_rate_threshold=0,
informer=email_informer,
em_group="最强行业",
title="最强行业",
em_group_over_write=True,
return_type=TopType.positive,
entity_ids=entity_ids,
Expand All @@ -110,6 +113,7 @@ def report_top_blocks():
turnover_rate_threshold=0,
informer=email_informer,
em_group="最强概念",
title="最强概念",
em_group_over_write=True,
return_type=TopType.positive,
entity_ids=entity_ids,
Expand All @@ -123,14 +127,15 @@ def report_top_stockhks():
entity_provider="em",
data_provider="em",
top_count=10,
periods=[*range(2, 27)],
periods=[*range(1, 15)],
ignore_new_stock=False,
ignore_st=False,
adjust_type=None,
turnover_threshold=30000000,
turnover_rate_threshold=0.001,
turnover_rate_threshold=0.01,
informer=email_informer,
em_group="短期最强",
title="短期最强",
em_group_over_write=False,
return_type=TopType.positive,
)
Expand All @@ -140,14 +145,15 @@ def report_top_stockhks():
entity_provider="em",
data_provider="em",
top_count=10,
periods=[30, 60],
periods=[30, 50],
ignore_new_stock=True,
ignore_st=False,
adjust_type=None,
turnover_threshold=30000000,
turnover_rate_threshold=0.001,
turnover_rate_threshold=0.01,
informer=email_informer,
em_group="中期最强",
title="中期最强",
em_group_over_write=False,
return_type=TopType.positive,
)
Expand Down
93 changes: 64 additions & 29 deletions examples/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from zvt.api.stats import get_top_performance_entities_by_periods
from zvt.contract.api import get_entities
from zvt.domain import StockNews
from zvt.domain import StockNews, Stock
from zvt.utils import next_date, today

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -53,17 +53,41 @@ def hot_stats(data: pd.Series):
pass


def group_stocks_by_topic(entities, start_timestamp=None):
# 默认半年内的新闻
def group_stocks_by_topic(
keyword=None, entities=None, hot_words_config=None, start_timestamp=None, days_ago=60, threshold=3
):
"""
:param keyword:
:param entities:
:param hot_words_config: hot words config为二重结构,即 主题:[分支1,分支2,...]的形式
比如一个有效的item:{"华为":["华为", "mate pro", "星闪", "问界"]}
:param start_timestamp:
:param days_ago:
:param threshold:
:return:
"""
if not start_timestamp:
start_timestamp = next_date(today(), -180)
start_timestamp = next_date(today(), -days_ago)
stock_map = {}

entity_ids = None
if entities:
entity_ids = [entity.entity_id for entity in entities]
else:
entities = Stock.query_data(provider="em", return_type="domain")

for entity in entities:
stock_map[entity.entity_id] = {"code": entity.code, "name": entity.name}
df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=[entity.entity_id for entity in entities])

filters = None
if keyword:
filters = [StockNews.news_title.contains(keyword)]
df = StockNews.query_data(start_timestamp=start_timestamp, entity_ids=entity_ids, filters=filters)
df = df.groupby("entity_id")["news_title"].apply(",".join).reset_index()

hot_words_config = get_hot_words_config()
if not hot_words_config:
hot_words_config = get_hot_words_config()

hot_stocks_map = {}
topic_count = {}
Expand All @@ -73,22 +97,21 @@ def group_stocks_by_topic(entities, start_timestamp=None):
text = row["news_title"]

is_hot = False
# hot words config为二重结构
# 即 主题:[分支1,分支2,...]的形式
for topic in hot_words_config:
topic_count.setdefault(topic, 0)
for words in hot_words_config[topic]:
hot_stocks_map.setdefault(words, [])
word_count.setdefault(words, 0)
for word in words.split():
count = text.count(word)
if count > 0:
word_count[words] = word_count[words] + 1
topic_count[topic] = topic_count[topic] + 1
hot_stocks_map[words].append(
(f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count)
)
is_hot = True
count = 0
for word in words.split(","):
count = text.count(word) + count
if count >= threshold:
word_count[words] = word_count[words] + 1
topic_count[topic] = topic_count[topic] + 1
hot_stocks_map[words].append(
(f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", count)
)
is_hot = True
if not is_hot:
hot_stocks_map.setdefault("其他", [])
hot_stocks_map["其他"].append((f"{stock_map[entity_id]['code']}({stock_map[entity_id]['name']})", 0))
Expand All @@ -106,24 +129,36 @@ def group_stocks_by_topic(entities, start_timestamp=None):
]
result.append((f"{topic}({count})", topic_words_stocks))

result.append(("其他", [("其他", hot_stocks_map["其他"])]))
result.append(("其他", [("其他", hot_stocks_map.get("其他", ""))]))

return result


if __name__ == "__main__":
ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em")

entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain")

group_info = group_stocks_by_topic(entities=entities)
info = ""
def msg_group_stocks_by_topic(
keyword=None, entities=None, hot_words_config=None, start_timestamp=None, days_ago=60, threshold=3
):
group_info = group_stocks_by_topic(
keyword=keyword,
entities=entities,
hot_words_config=hot_words_config,
start_timestamp=start_timestamp,
days_ago=days_ago,
threshold=threshold,
)
msg = ""
for group in group_info:
topic = group[0]
info = info + f"^^^^^^ {topic} ^^^^^^\n"
msg = msg + f"^^^^^^ {topic} ^^^^^^\n"
for topic_word, stocks_count in group[1]:
info = info + f"{topic_word}\n"
msg = msg + f"{topic_word}\n"
stocks = [f"{stock_count[0]} {stock_count[1]}" for stock_count in stocks_count]
info = info + "\n".join(stocks) + "\n"
msg = msg + "\n".join(stocks) + "\n"
return msg


if __name__ == "__main__":
ids = get_top_performance_entities_by_periods(entity_provider="em", data_provider="em")

entities = get_entities(provider="em", entity_type="stock", entity_ids=ids, return_type="domain")

print(info)
print(msg_group_stocks_by_topic(entities=entities, threshold=1))
24 changes: 16 additions & 8 deletions src/zvt/api/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,17 +113,25 @@ def get_top_performance_entities_by_periods(
filters = [kdata_schema.entity_id.in_(filter_entity_ids)]
selected = []
current_start = None
real_period = 1
for i, period in enumerate(periods):
start = next_date(target_date, -period)
trade_days = get_trade_dates(start=next_date(target_date, -period), end=target_date)
if not trade_days:
logger.info(f"no trade days in: {start} to {target_date}")
continue
if current_start and is_same_date(current_start, trade_days[0]):
logger.info("ignore same trade days")
continue
real_period = max(real_period, period)
while True:
start = next_date(target_date, -real_period)
trade_days = get_trade_dates(start=start, end=target_date)
if not trade_days:
logger.info(f"no trade days in: {start} to {target_date}")
real_period = real_period + 1
continue
if current_start and is_same_date(current_start, trade_days[0]):
logger.info("ignore same trade days")
real_period = real_period + 1
continue
break
current_start = trade_days[0]
current_end = trade_days[-1]

logger.info(f"trade days in: {current_start} to {current_end}, real_period: {real_period} ")
positive_df, negative_df = get_top_performance_entities(
entity_type=entity_type,
start_timestamp=current_start,
Expand Down

0 comments on commit 981ba2a

Please # to comment.