diff --git a/examples/utils.py b/examples/utils.py index c58748e3..e43f3efb 100644 --- a/examples/utils.py +++ b/examples/utils.py @@ -54,6 +54,7 @@ def hot_stats(data: pd.Series): def group_stocks_by_topic(entities, start_timestamp=None): + # 默认半年内的新闻 if not start_timestamp: start_timestamp = next_date(today(), -180) stock_map = {} @@ -72,6 +73,8 @@ def group_stocks_by_topic(entities, start_timestamp=None): text = row["news_title"] is_hot = False + # hot words config为二重结构 + # 即 主题:[分支1,分支2,...]的形式 for topic in hot_words_config: topic_count.setdefault(topic, 0) for words in hot_words_config[topic]: diff --git a/src/zvt/recorders/eastmoney/meta/eastmoney_block_meta_recorder.py b/src/zvt/recorders/eastmoney/meta/eastmoney_block_meta_recorder.py index d2ef3b7c..6c4e5d0f 100644 --- a/src/zvt/recorders/eastmoney/meta/eastmoney_block_meta_recorder.py +++ b/src/zvt/recorders/eastmoney/meta/eastmoney_block_meta_recorder.py @@ -97,9 +97,9 @@ def record(self, entity, start, end, size, timestamps): if __name__ == "__main__": # init_log('china_stock_category.log') - # EastmoneyBlockRecorder().run() + EastmoneyBlockRecorder().run() - recorder = EastmoneyBlockStockRecorder() + recorder = EastmoneyBlockStockRecorder(code="BK1144") recorder.run() # the __all__ is generated __all__ = ["EastmoneyBlockRecorder", "EastmoneyBlockStockRecorder"] diff --git a/src/zvt/recorders/em/em_api.py b/src/zvt/recorders/em/em_api.py index 645bdf3b..2daed84b 100644 --- a/src/zvt/recorders/em/em_api.py +++ b/src/zvt/recorders/em/em_api.py @@ -7,12 +7,20 @@ import pandas as pd import requests -from zvt.api import generate_kdata_id, value_to_pct +from zvt.api import generate_kdata_id, value_to_pct, china_stock_code_to_id from zvt.contract import ActorType, AdjustType, IntervalLevel, Exchange, TradableType, get_entity_exchanges from zvt.contract.api import decode_entity_id from zvt.domain import BlockCategory from zvt.recorders.consts import DEFAULT_HEADER -from zvt.utils import to_pd_timestamp, to_float, json_callback_param, now_timestamp, to_time_str +from zvt.utils import ( + to_pd_timestamp, + to_float, + json_callback_param, + now_timestamp, + to_time_str, + now_pd_timestamp, + current_date, +) logger = logging.getLogger(__name__) @@ -540,6 +548,34 @@ def get_tradable_list( return pd.concat(dfs) +def get_block_stocks(block_id, name=""): + entity_type, exchange, code = decode_entity_id(block_id) + category_stocks_url = f"http://48.push2.eastmoney.com/api/qt/clist/get?cb=jQuery11240710111145777397_{now_timestamp() - 1}&pn=1&pz=1000&po=1&np=1&ut=bd1d9ddb04089700cf9c27f6f7426281&fltt=2&invt=2&wbp2u=4668014655929990|0|1|0|web&fid=f3&fs=b:{code}+f:!50&fields=f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152,f45&_={now_timestamp()}" + resp = requests.get(category_stocks_url, headers=DEFAULT_HEADER) + results = json_callback_param(resp.text)["data"]["diff"] + the_list = [] + for result in results: + stock_code = result["f12"] + stock_name = result["f14"] + stock_id = china_stock_code_to_id(stock_code) + + the_list.append( + { + "id": "{}_{}".format(block_id, stock_id), + "entity_id": block_id, + "entity_type": "block", + "exchange": exchange, + "code": code, + "name": name, + "timestamp": current_date(), + "stock_id": stock_id, + "stock_code": stock_code, + "stock_name": stock_name, + } + ) + return the_list + + def get_news(entity_id, ps=200, index=1, start_timestamp=None): sec_id = to_em_sec_id(entity_id=entity_id) url = f"https://np-listapi.eastmoney.com/comm/wap/getListInfo?cb=callback&client=wap&type=1&mTypeAndCode={sec_id}&pageSize={ps}&pageIndex={index}&callback=jQuery1830017478247906740352_{now_timestamp() - 1}&_={now_timestamp()}" @@ -725,11 +761,10 @@ def to_zvt_code(code): # print(df) # df = get_dragon_and_tiger(code="000989", start_date="2018-10-31") # df = get_dragon_and_tiger_list(start_date="2022-04-25") - df = get_tradable_list() - df_delist = df[df["name"].str.contains("退")] - print(df_delist[["id", "name"]].values.tolist()) - - print(df) + # # df = get_tradable_list() + # # df_delist = df[df["name"].str.contains("退")] + # print(df_delist[["id", "name"]].values.tolist()) + print(get_block_stocks(block_id="block_cn_BK1144")) # the __all__ is generated __all__ = [ "get_treasury_yield", diff --git a/src/zvt/recorders/em/meta/em_block_meta_recorder.py b/src/zvt/recorders/em/meta/em_block_meta_recorder.py index 887f21d1..e4e404f8 100644 --- a/src/zvt/recorders/em/meta/em_block_meta_recorder.py +++ b/src/zvt/recorders/em/meta/em_block_meta_recorder.py @@ -1,8 +1,9 @@ # -*- coding: utf-8 -*- +import pandas as pd from zvt.contract.api import df_to_db -from zvt.contract.recorder import Recorder -from zvt.domain import Block, BlockCategory +from zvt.contract.recorder import Recorder, TimeSeriesDataRecorder +from zvt.domain import Block, BlockCategory, BlockStock from zvt.recorders.em import em_api @@ -17,8 +18,24 @@ def run(self): df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) +class EMBlockStockRecorder(TimeSeriesDataRecorder): + entity_provider = "em" + entity_schema = Block + + provider = "em" + data_schema = BlockStock + + def record(self, entity, start, end, size, timestamps): + the_list = em_api.get_block_stocks(entity.id, entity.name) + if the_list: + df = pd.DataFrame.from_records(the_list) + df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) + self.logger.info("finish recording block:{},{}".format(entity.category, entity.name)) + self.sleep() + + if __name__ == "__main__": - recorder = EMBlockRecorder() + recorder = EMBlockStockRecorder(day_data=True, sleeping_time=0) recorder.run() # the __all__ is generated -__all__ = ["EMBlockRecorder"] +__all__ = ["EMBlockRecorder", "EMBlockStockRecorder"]