from jqdatasdk import *
import seaborn as sns
import math
import datetime
from tqdm import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
1.1、财务数据——总市值和PE
def load_fundamentals_data(context):
'''
加载股票的财务数据,包括总市值和PE
'''
df = get_fundamentals(query(valuation,indicator), context.current_dt.strftime("%Y-%m-%d"))
raw_data = []
for index in range(len(df['code'])):
raw_data_item = {
'code' :df['code'][index],
'market_cap':df['market_cap'][index],
'pe_ratio' :df['pe_ratio'][index]
}
raw_data.append(raw_data_item)
return raw_data
df=get_fundamentals(query(valuation))
df = get_fundamentals(query(valuation,indicator))
raw_data = []
for index in range(len(df['code'])):
raw_data_item = {
'code' :df['code'][index],
'market_cap':df['market_cap'][index],
'pe_ratio' :df['pe_ratio'][index]
}
raw_data.append(raw_data_item)
# 按照财务信息中的总市值降序排序
raw_data = sorted(raw_data,key = lambda item:item['market_cap'], reverse=True)
# 剔除总市值排名最小的10%的股票
fitered_market_cap = raw_data[:int(len(raw_data) * 0.9)]
# 剔除PE TTM 小于0或大于100
filtered_pe = []
for stock in fitered_market_cap:
if stock['pe_ratio'] == None or math.isnan(stock['pe_ratio']) or float(stock['pe_ratio']) < 0 or float(stock['pe_ratio']) > 100:
continue
filtered_pe.append(stock['code'])
#PE数据集
df_pe=pd.DataFrame(fitered_market_cap)
1.2、行情数据——25日涨跌幅
current_dt=datetime.datetime.now().strftime('%Y-%m-%d')
CHANGE_PCT_DAY_NUMBER=25
def load_change_pct_data(current_dt,codes):
change_pct_dict_list = []
# 计算涨跌幅需要用到前一日收盘价,所以需要多加载一天的数据,
# 而这里在第二日的开盘前运行,计算前一个交易日收盘后的数据,所以需要再多加载一天
# 使用固定的25个交易日,而非25个bar计算涨跌幅
count = CHANGE_PCT_DAY_NUMBER + 1
# 获取25个交易日的日期
pre_25_dates = get_trade_days(start_date=None, end_date=current_dt, count=count)
pre_25_date = pre_25_dates[0]
pre_1_date = pre_25_dates[-1]
for code in codes:
pre_25_data = get_price(code, start_date=None, end_date=pre_25_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_1_data = get_price(code, start_date=None, end_date=pre_1_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_25_close = None
pre_1_close = None
if str(pre_25_date) == str(pre_25_data.index[0])[:10]:
pre_25_close = pre_25_data['close'][0]
if str(pre_1_date) == str(pre_1_data.index[0])[:10]:
pre_1_close = pre_1_data['close'][0]
if pre_25_close != None and pre_1_close != None and not math.isnan(pre_25_close) and not math.isnan(pre_1_close):
change_pct = (pre_1_close - pre_25_close) / pre_25_close
item = {'code':code, 'change_pct': change_pct}
change_pct_dict_list.append(item)
return change_pct_dict_list
def load_change_pct_data_1(current_dt,code):
change_pct_dict_list = []
# 计算涨跌幅需要用到前一日收盘价,所以需要多加载一天的数据,
# 而这里在第二日的开盘前运行,计算前一个交易日收盘后的数据,所以需要再多加载一天
# 使用固定的25个交易日,而非25个bar计算涨跌幅
count = CHANGE_PCT_DAY_NUMBER + 1
# 获取25个交易日的日期
pre_25_dates = get_trade_days(start_date=None, end_date=current_dt, count=count)
pre_25_date = pre_25_dates[0]
pre_1_date = pre_25_dates[-1]
pre_25_data = get_price(code, start_date=None, end_date=pre_25_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_1_data = get_price(code, start_date=None, end_date=pre_1_date, frequency='daily', fields=['close'], skip_paused=True, fq='post', count=1)
pre_25_close = None
pre_1_close = None
if str(pre_25_date) == str(pre_25_data.index[0])[:10]:
pre_25_close = pre_25_data['close'][0]
if str(pre_1_date) == str(pre_1_data.index[0])[:10]:
pre_1_close = pre_1_data['close'][0]
if pre_25_close != None and pre_1_close != None and not math.isnan(pre_25_close) and not math.isnan(pre_1_close):
change_pct = (pre_1_close - pre_25_close) / pre_25_close
item = {'code':code, 'change_pct': change_pct}
change_pct_dict_list.append(item)
return change_pct_dict_list
# 加载标的的涨跌幅信息
change_pct_dict_list = load_change_pct_data(current_dt,filtered_pe)
# 按照涨跌幅升序排序
change_pct_dict_list = sorted(change_pct_dict_list,key = lambda item:item['change_pct'], reverse=False)
# 取跌幅前10%的股票
change_pct_dict_list = change_pct_dict_list[0:(int(len(change_pct_dict_list)*0.1))]
change_pct_dict_list=[]
for i in tqdm(filtered_pe):
change_pct_dict_list.append(load_change_pct_data_1(current_dt,i))
change_pct_dict_df1=[]
for i in change_pct_dict_list:
if i!=[]:
change_pct_dict_df1.append(i[0])
change_pct_dict_df=pd.DataFrame(change_pct_dict_df1)
1.3财务数据-资产负债率
##资产负债率ratio_liability_vs_assets=总负债total_liability/总资产total_assets
##合并资产负债表
q=query(finance.STK_BALANCE_SHEET.company_name,
finance.STK_BALANCE_SHEET.code,
finance.STK_BALANCE_SHEET.pub_date,
finance.STK_BALANCE_SHEET.start_date,
finance.STK_BALANCE_SHEET.end_date,
finance.STK_BALANCE_SHEET.cash_equivalents,
finance.STK_BALANCE_SHEET.total_assets,
finance.STK_BALANCE_SHEET.total_liability
).filter(finance.STK_BALANCE_SHEET.pub_date>='2019-01-01',finance.STK_BALANCE_SHEET.report_type==0)
df=finance.run_query(q)
df['ratio_liability_vs_assets']=df['total_liability']/df['total_assets']
df['rank_1']=df.sort_values(['pub_date', 'end_date'], ascending=[ False, False]).groupby(['code']).cumcount() + 1
df2=df[df['rank_1']==1]
df2['ratio_liability_vs_assets_seg']=pd.cut(df2['ratio_liability_vs_assets'],[0,0.3,0.4,0.6,0.8,1,10])
pd.value_counts(df2['ratio_liability_vs_assets_seg'])
(0.0, 0.3] 996
(0.4, 0.6] 920
(0.3, 0.4] 517
(0.6, 0.8] 470
(0.8, 1.0] 137
(1.0, 10.0] 8
Name: ratio_liability_vs_assets_seg, dtype: int64
df2['ratio_liability_vs_assets'].describe()
count 3048.000000
mean 0.422930
std 0.266916
min 0.009660
25% 0.251521
50% 0.402005
75% 0.562507
max 8.257912
Name: ratio_liability_vs_assets, dtype: float64
df3=df2[(df2['ratio_liability_vs_assets']<=1)]['ratio_liability_vs_assets']
sns.distplot(df3)
![output_27_1 output_27_1]()
#格式转换
df2['zs1']=df2.ratio_liability_vs_assets_seg.astype(str)
#取资产负债率和25日跌幅
df3=pd.merge(df2,change_pct_dict_df,on='code',how='inner')
df4=df3[['company_name','code','ratio_liability_vs_assets','change_pct']]
# 资产负债率与25日跌幅相关性分析
def plot_corr(df,col_list,threshold=None,plt_size=None,is_annot=True):
"""
df:数据集
col_list:变量list集合
threshold: 相关性设定的阈值
plt_size:图纸尺寸
is_annot:是否显示相关系数值
return :相关性热力图
"""
corr_df = df.loc[:,col_list].corr()
plt.figure(figsize=plt_size)
sns.heatmap(corr_df,annot=is_annot,cmap='rainbow',vmax=1,vmin=-1,mask=np.abs(corr_df)<=threshold)
return plt.show()
# 剔除PE TTM 小于0或大于100
df_pe=df_pe[(df_pe.pe_ratio>=0)&(df_pe.pe_ratio<=100)]
#df4加入pe_ratio
df5=pd.merge(df4,df_pe,on='code',how='inner')
1.4财务数据——流动比率
##流动比率 current_ratio =流动资产合计 total_current_assets/流动负债合计 total_current_liability
##合并资产负债表
q=query(finance.STK_BALANCE_SHEET.company_name,
finance.STK_BALANCE_SHEET.code,
finance.STK_BALANCE_SHEET.pub_date,
finance.STK_BALANCE_SHEET.start_date,
finance.STK_BALANCE_SHEET.end_date,
finance.STK_BALANCE_SHEET.total_current_assets,
finance.STK_BALANCE_SHEET.total_current_liability
).filter(finance.STK_BALANCE_SHEET.pub_date>='2018-01-01',finance.STK_BALANCE_SHEET.report_type==0)
df_current=finance.run_query(q)
df_current['current_ratio']=df_current['total_current_assets']/df_current['total_current_liability']
#取最新一份财报数据
df_current['rank_1']=df_current.sort_values(['pub_date', 'end_date'], ascending=[ False, False]).groupby(['code']).cumcount() + 1
df_current=df_current[df_current.rank_1==1]
# df_current=df_current[['code','current_ratio']].dropna()
df_current=df_current[['code','current_ratio']]
#合并到特征表
df6=pd.merge(df5,df_current,on='code',how='left')
#相关性分析
plot_corr(df6,['ratio_liability_vs_assets','change_pct','market_cap','pe_ratio','current_ratio'])
![output_45_0 output_45_0]()
df_dist=df6[df6.change_pct>0.2]['current_ratio'].dropna()
|
company_name |
code |
ratio_liability_vs_assets |
change_pct |
market_cap |
pe_ratio |
current_ratio |
| 0 |
兰州民百(集团)股份有限公司 |
600738.XSHG |
0.471485 |
-0.230364 |
32.5629 |
6.0867 |
0.972715 |
| 1 |
山东沃华医药科技股份有限公司 |
002107.XSHE |
0.245668 |
0.221279 |
40.8015 |
40.0212 |
NaN |
| 2 |
江西金力永磁科技股份有限公司 |
300748.XSHE |
0.464905 |
-0.204534 |
128.5336 |
81.9638 |
NaN |
| 3 |
青岛农村商业银行股份有限公司 |
002958.XSHE |
0.927440 |
-0.069841 |
319.4444 |
11.3078 |
NaN |
| 4 |
上海永冠众诚新材料科技(集团)股份有限公司 |
603681.XSHG |
0.319237 |
-0.167230 |
36.5502 |
27.2604 |
NaN |
| 5 |
厦门光莆电子股份有限公司 |
300632.XSHE |
0.479532 |
-0.092742 |
44.6479 |
25.7679 |
2.763980 |
| 6 |
奥美医疗用品股份有限公司 |
002950.XSHE |
0.544276 |
1.079046 |
208.6398 |
62.8040 |
NaN |
| 7 |
宁波热电股份有限公司 |
600982.XSHG |
0.410976 |
-0.107203 |
29.1234 |
31.6918 |
2.441460 |
| 8 |
中航重机股份有限公司 |
600765.XSHG |
0.617861 |
-0.055741 |
88.5990 |
27.0453 |
1.193113 |
| 9 |
昆山新莱洁净应用材料股份有限公司 |
300260.XSHE |
0.621686 |
0.023700 |
26.3128 |
49.3646 |
1.528724 |
| 10 |
云南鸿翔一心堂药业(集团)股份有限公司 |
002727.XSHE |
0.449485 |
-0.112977 |
119.8563 |
19.8512 |
1.990085 |
| 11 |
西安蓝晓科技新材料股份有限公司 |
300487.XSHE |
0.478631 |
-0.013625 |
80.9049 |
28.2361 |
2.688508 |
| 12 |
江苏新泉汽车饰件股份有限公司 |
603179.XSHG |
0.587382 |
0.042727 |
50.0095 |
27.0734 |
1.209574 |
| 13 |
苏州迈为科技股份有限公司 |
300751.XSHE |
0.580113 |
0.030505 |
104.0000 |
41.9587 |
NaN |
| 14 |
上海瀚讯信息技术股份有限公司 |
300762.XSHE |
0.402701 |
-0.108459 |
67.7469 |
62.4448 |
NaN |
| 15 |
山东民和牧业股份有限公司 |
002234.XSHE |
0.572745 |
-0.105084 |
83.9388 |
5.6186 |
NaN |
| 16 |
深圳市飞荣达科技股份有限公司 |
300602.XSHE |
0.381029 |
0.210325 |
170.0924 |
47.3213 |
4.174113 |
| 17 |
杭州福斯特应用材料股份有限公司 |
603806.XSHG |
0.139437 |
-0.053945 |
244.7336 |
24.5508 |
6.849038 |
| 18 |
内蒙古伊利实业集团股份有限公司 |
600887.XSHG |
0.411059 |
-0.137787 |
1762.4631 |
25.0969 |
NaN |
| 19 |
上海悦心健康集团股份有限公司 |
002162.XSHE |
0.582459 |
-0.093400 |
28.1746 |
74.3437 |
0.691728 |
| 20 |
深圳市联得自动化装备股份有限公司 |
300545.XSHE |
0.490755 |
-0.073194 |
47.6353 |
59.1575 |
1.943353 |
| 21 |
申万宏源集团股份有限公司 |
000166.XSHE |
0.795252 |
-0.065642 |
1171.8694 |
20.9062 |
NaN |
| 22 |
上海富瀚微电子股份有限公司 |
300613.XSHE |
0.130383 |
-0.095772 |
76.0006 |
84.4895 |
8.161842 |
| 23 |
北京昭衍新药研究中心股份有限公司 |
603127.XSHG |
0.429196 |
0.033820 |
115.8378 |
90.7828 |
1.918646 |
| 24 |
凌源钢铁股份有限公司 |
600231.XSHG |
0.534481 |
-0.141450 |
64.0120 |
18.7002 |
0.793143 |
| 25 |
广东广州日报传媒股份有限公司 |
002181.XSHE |
0.097676 |
-0.161290 |
58.5173 |
69.7058 |
6.912332 |
| 26 |
青岛汇金通电力设备股份有限公司 |
603577.XSHG |
0.548940 |
-0.092017 |
20.1170 |
39.6529 |
1.659044 |
| 27 |
石药集团新诺威制药股份有限公司 |
300765.XSHE |
0.281458 |
-0.014568 |
71.7000 |
26.2908 |
NaN |
| 28 |
东方财富信息股份有限公司 |
300059.XSHE |
0.605756 |
0.105098 |
1165.1581 |
75.0578 |
1.608532 |
| 29 |
中设设计集团股份有限公司 |
603018.XSHG |
0.625911 |
0.016099 |
49.9201 |
11.1978 |
1.443960 |
| ... |
... |
... |
... |
... |
... |
... |
... |
| 2126 |
山东好当家海洋发展股份有限公司 |
600467.XSHG |
0.497659 |
-0.101710 |
32.2880 |
50.5187 |
0.697399 |
| 2127 |
福建福能股份有限公司 |
600483.XSHG |
0.504333 |
-0.017453 |
132.6818 |
10.2428 |
1.695981 |
| 2128 |
天津天药药业股份有限公司 |
600488.XSHG |
0.307980 |
0.215105 |
55.4678 |
34.7858 |
1.644740 |
| 2129 |
烽火通信科技股份有限公司 |
600498.XSHG |
0.632396 |
0.098594 |
351.0612 |
42.1872 |
1.491932 |
| 2130 |
腾达建设集团股份有限公司 |
600512.XSHG |
0.568835 |
0.055225 |
48.9264 |
9.0598 |
2.277506 |
| 2131 |
南京栖霞建设股份有限公司 |
600533.XSHG |
0.775681 |
-0.071714 |
32.6550 |
5.4489 |
2.137236 |
| 2132 |
卓郎智能技术股份有限公司 |
600545.XSHG |
0.584635 |
-0.144033 |
102.3523 |
15.9659 |
2.370016 |
| 2133 |
江苏康缘药业股份有限公司 |
600557.XSHG |
0.296340 |
-0.088373 |
80.7504 |
16.2524 |
1.642700 |
| 2134 |
四川大西洋焊接材料股份有限公司 |
600558.XSHG |
0.275038 |
-0.119258 |
25.8510 |
77.7262 |
1.785506 |
| 2135 |
河北衡水老白干酒业股份有限公司 |
600559.XSHG |
0.465842 |
-0.170685 |
84.9731 |
22.5027 |
NaN |
| 2136 |
北京京能电力股份有限公司 |
600578.XSHG |
0.596164 |
-0.110699 |
183.5112 |
12.9139 |
0.485902 |
| 2137 |
新疆八一钢铁股份有限公司 |
600581.XSHG |
0.794028 |
-0.110823 |
47.8264 |
13.1655 |
0.238045 |
| 2138 |
天地科技股份有限公司 |
600582.XSHG |
0.446959 |
-0.043739 |
126.6408 |
11.6222 |
2.126360 |
| 2139 |
安徽海螺水泥股份有限公司 |
600585.XSHG |
0.194822 |
0.060494 |
2907.1975 |
8.8328 |
2.582034 |
| 2140 |
云赛智联股份有限公司 |
600602.XSHG |
0.229199 |
-0.051843 |
112.5595 |
40.8208 |
2.673322 |
| 2141 |
上海市北高新股份有限公司 |
600604.XSHG |
0.542533 |
-0.158325 |
135.4399 |
67.1801 |
3.477707 |
| 2142 |
山西省国新能源股份有限公司 |
600617.XSHG |
0.849032 |
-0.157553 |
43.4950 |
93.9236 |
NaN |
| 2143 |
上海华谊集团股份有限公司 |
600623.XSHG |
0.543544 |
-0.136203 |
121.4756 |
15.3251 |
1.142360 |
| 2144 |
上海申达股份有限公司 |
600626.XSHG |
0.631430 |
0.046837 |
55.2285 |
13.8919 |
NaN |
| 2145 |
浙报数字文化集团股份有限公司 |
600633.XSHG |
0.126146 |
0.024541 |
130.5830 |
26.5269 |
NaN |
| 2146 |
乐山电力股份有限公司 |
600644.XSHG |
0.525809 |
-0.111629 |
26.5432 |
29.7453 |
0.541703 |
| 2147 |
上海外高桥集团股份有限公司 |
600648.XSHG |
0.612917 |
-0.159757 |
166.6693 |
18.3664 |
NaN |
| 2148 |
北京电子城投资开发集团股份有限公司 |
600658.XSHG |
0.508053 |
-0.072935 |
52.5735 |
21.4715 |
3.305817 |
| 2149 |
上海强生控股股份有限公司 |
600662.XSHG |
0.486859 |
-0.158337 |
40.8705 |
23.0654 |
0.917287 |
| 2150 |
天地源股份有限公司 |
600665.XSHG |
0.841852 |
-0.086732 |
28.1704 |
8.9896 |
1.904141 |
| 2151 |
无锡市太极实业股份有限公司 |
600667.XSHG |
0.597682 |
0.308733 |
253.5853 |
38.5628 |
1.254031 |
| 2152 |
中华企业股份有限公司 |
600675.XSHG |
0.658932 |
-0.138149 |
254.8185 |
6.9895 |
1.284974 |
| 2153 |
上海交运集团股份有限公司 |
600676.XSHG |
0.297939 |
-0.188295 |
39.9055 |
18.4038 |
2.270599 |
| 2154 |
南京新街口百货商店股份有限公司 |
600682.XSHG |
0.359603 |
-0.136468 |
122.6326 |
9.7458 |
0.750177 |
| 2155 |
中国石化上海石油化工股份有限公司 |
600688.XSHG |
0.277847 |
0.005398 |
432.9525 |
18.9153 |
1.818862 |
2156 rows × 7 columns