PostgreSQL 线性回归 - 股价预测 1
背景
本文是一个简单的一元线性回归的例子, 需要了解相关理论知识可参考 :
《在PostgreSQL中用线性回归分析(linear regression) - 实现数据预测》
数据来自股票的收盘价.
如果要做多元分析, 请自行增加维度(例如大盘指数,成交量,最低,最高价,价格区间等等都可以那里做回归计算), 计算期望值.
例子如下 :
按实际顺序录入test, 如时间从小到大, ID从小到大, 最终目标是用前一个值预测下一个值
第一条记录是最老的值, 最后一条记录是最新的值, 不要搞反了.
create table test(id int, cnt numeric);
以下是网友提供的阿里巴巴的收盘价数据
格式 date,close,volume,open,high,low
vi ali.txt
2015/3/3,81.58,39173430,82.95,83.25,80.03
2015/3/2,84,11175870,85,85.02,83.75
2015/2/27,85.12,8362081,85.95,86.56,85
2015/2/26,85.37,8758019,86.92,87.16,85.2
2015/2/25,86.19,13592080,84.38,86.83,84.36
2015/2/24,84.69,15817560,85.53,85.53,83.88
2015/2/23,85.47,9352529,86.51,86.68,85.25
2015/2/20,86.64,7805009,87.25,87.29,86.38
2015/2/19,86.89,7568275,86.81,87.87,86.71
2015/2/18,86.74,7415961,87.1,87.43,86.5
2015/2/17,86.85,15133130,88.78,88.99,86.7
2015/2/13,89.05,14587390,88.2,89.3,87.65
2015/2/12,87.1,15143910,85.6,88.3,85.55
2015/2/11,86,12281340,87.58,87.7,85.82
2015/2/10,87.26,12026260,87.01,87.47,86.52
2015/2/9,86,12088070,85.83,86.75,85.47
2015/2/6,85.68,17597490,87.11,87.4,85.42
2015/2/5,87,28867570,89.58,89.84,86.1
2015/2/4,90,14599400,90.99,91.88,89.48
2015/2/3,90.61,13475970,91.65,91.65,89.9
2015/2/2,90.13,18845140,91.13,91.66,88.61
2015/1/30,89.08,36747370,89.6,92,88.11
2015/1/29,89.81,76430640,90.53,90.74,87.36
2015/1/28,98.45,42012700,100.3,101.49,97.79
2015/1/27,102.94,15591560,102.89,103.57,100.58
2015/1/26,103.99,10656340,104.4,105.13,103.33
2015/1/23,103.11,9860504,104.02,105.2,103.02
2015/1/22,104,11400180,104.6,104.92,103.1
2015/1/21,103.29,15141470,100.75,103.86,100.32
2015/1/20,100.04,12049470,98.3,100.21,97.59
2015/1/16,96.89,13313120,96.09,97.8,95.52
2015/1/15,96.31,18182200,99.67,100.14,96.02
2015/1/14,99.58,17785990,99.28,100.18,98.06
2015/1/13,100.77,11275790,102.59,102.85,100.01
2015/1/12,101.62,7946740,103.2,103.36,101.21
2015/1/9,103.02,10193410,105.24,105.3,102.89
2015/1/8,105.03,12904640,102.95,105.3399,102.68
2015/1/7,102.13,11000010,104.59,104.74,102.03
2015/1/6,103.32,15662710,101.25,103.85,100.11
2015/1/5,101,18313750,102.76,103.02,99.9
2015/1/2,103.6,12286400,104.24,104.72,102.52
2014/12/31,103.94,10254350,106.46,106.47,103.69
2014/12/30,105.75,10185890,105.64,106.71,105.13
2014/12/29,105.98,8059191,105.95,107.66,105.64
2014/12/26,105.95,6523956,105.99,106.94,105.5
2014/12/24,105.95,5870358,105.68,107.21,105.6
2014/12/23,105.52,19077270,108.3,108.47,103.88
2014/12/22,108.77,12985130,110.63,110.98,108.53
2014/12/19,110.65,14785340,109.93,110.65,108.04
2014/12/18,109.25,22749890,110.58,111.2,108.26
2014/12/17,109.02,17242940,107.11,109.19,106.28
2014/12/16,105.77,21655360,103.75,107.68,103.7
2014/12/15,104.7,16476750,106.39,107.77,103.7
2014/12/12,105.11,14504050,104.7,107.45,104.179
2014/12/11,104.97,15658440,104.44,106.84,104.29
2014/12/10,103.88,18398440,107.09,107.38,103.51
2014/12/9,107.48,24805130,102.27,107.95,101.2
2014/12/8,105.07,19166550,105.97,107.4,104.2101
2014/12/5,107.9,12125090,109.6,110.35,107.76
2014/12/4,109.17,10790400,110.1,110.5,108.91
2014/12/3,110.64,16164860,110.4,111.68,108.798
导入数据库
create table tmp(c1 text,c2 numeric,c3 numeric,c4 numeric,c5 numeric,c6 numeric);
copy tmp from '/home/postgres/ali.txt' with (format csv);
调换一下顺序
insert into test select row_number() over(order by ctid desc), c2 from tmp;
校验函数
CREATE OR REPLACE FUNCTION public.check_predict(
IN ov integer, -- 校验哪条记录
OUT rv numeric, -- 真实值
OUT pv numeric, -- 预测值
OUT dev numeric, -- 误差
OUT zv numeric, -- 自变量
OUT v_slope numeric, -- 斜率
OUT v_inter numeric, -- 截距
OUT v_r2 numeric -- 相关性
)
RETURNS record
LANGUAGE plpgsql
AS $function$
declare
r2_1 numeric := 0; -- 相关性
r2_2 numeric := 0; -- 最大相关性
var int; -- 样本起点
inter_1 numeric; -- 截距
slope_1 numeric; -- 斜率
inter_2 numeric; -- 最大相关性截距
slope_2 numeric; -- 最大相关性斜率
realv numeric; -- 真实数据
predicv numeric; -- 预测数据
offset_var int := ov; -- 倒数第?个值的预测值, 不停迭代, 最后计算所有的实际值和预测值的corr, 选择最佳相关?
v_cnt int8 := 0;
v_lmt int := 90; -- 使用的最大样本集, 影响预测准确度
v_min int := 5; -- 使用的最小样本数, 影响预测准确度
zcnt numeric := 0; -- 自变量
begin
select count(*) into v_cnt from test;
-- 自变量 t1
-- 因变量 t2
-- 筛选最大相关度的样本数, 并记录下储斜率, 截距.
for i in 0..v_lmt
loop
with t1 as (select row_number() over(order by id desc) as rn,* from (select id,lag(cnt,1) over(order by id) as zbl_cnt,cnt from test order by id desc offset offset_var limit v_lmt+v_min) t
where t.zbl_cnt is not null
)
select regr_intercept(t1.cnt,t1.zbl_cnt), regr_slope(t1.cnt,t1.zbl_cnt), regr_r2(t1.cnt,t1.zbl_cnt)
into inter_1,slope_1,r2_1
from t1
where t1.rn<=i+v_min;
if r2_1>r2_2 then
inter_2 := inter_1;
slope_2 := slope_1;
r2_2 := r2_1;
var := i+v_min;
end if;
end loop;
raise notice 'offset:%, 最大相关度样本数:%, 截距%, 斜率%, 相关性%', offset_var, var, round(inter_2,4), round(slope_2,4), round(r2_2,4);
select slope_2*cnt+inter_2, cnt into predicv,zcnt from test order by id desc offset offset_var limit 1;
-- offset_var=0的自变量, 表示最近一个值, 所以真实值还不存在, 那么realv=NULL
if offset_var=0 then
realv := NULL;
else
select cnt into realv from test order by id desc offset offset_var-1 limit 1;
end if;
raise notice '自变量%, 真实数据%, 预测数据%, 本次预测偏差%%%', zcnt, realv, round(predicv,2), abs(1-round(predicv/realv,4))*100;
rv := realv;
pv := round(predicv,2);
dev := abs(1-round(predicv/realv,4));
zv := zcnt;
v_slope := slope_2;
v_inter := inter_2;
v_r2 := r2_2;
return;
end;
$function$;
预测结果偏差较大, 看dev这列显示偏差
(如果你的数据包含今天的收盘价格的数据的话, 第一行就是预测的明天的收盘价格)
postgres=# select (check_predict(i)).* from generate_series(0,50) t(i);
rv | pv | dev | zv | v_slope | v_inter | v_r2
--------+--------+--------+--------+--------------------+--------------------+--------------------
| 81.50 | | 81.58 | 0.930196213911424 | 5.61421408650552 | 0.461071168376043
81.58 | 84.90 | 0.0407 | 84 | 0.560745221248429 | 37.8015394128117 | 0.23571694348864
84 | 85.84 | 0.0219 | 85.12 | 0.416036492272315 | 50.428211921701 | 0.160707728559778
85.12 | 86.06 | 0.0110 | 85.37 | 0.31332110091768 | 59.3087633551948 | 0.0916611831863153
85.37 | 86.40 | 0.0121 | 86.19 | 0.223065327998904 | 67.1752208792373 | 0.0892441401437571
86.19 | 85.51 | 0.0078 | 84.69 | 0.509756185614852 | 42.343562005006 | 0.338718526733452
84.69 | 85.97 | 0.0151 | 85.47 | 0.60492702580558 | 34.2628366224153 | 0.408610576552954
85.47 | 86.86 | 0.0163 | 86.64 | 0.641095276611868 | 31.3167837973076 | 0.422562923551744
86.64 | 87.06 | 0.0049 | 86.89 | 0.6396258966744 | 31.4865883689919 | 0.442022365274712
86.89 | 87.31 | 0.0048 | 86.74 | 0.314176461801723 | 60.0558838893752 | 0.345972028763493
86.74 | 87.07 | 0.0038 | 86.85 | 0.556953610508981 | 38.6967242480077 | 0.709038352162874
86.85 | 88.54 | 0.0194 | 89.05 | 0.71646536311035 | 24.7355217235248 | 0.806474667992637
89.05 | 86.82 | 0.0250 | 87.1 | 0.828893551212771 | 14.6233989516049 | 0.843205122436764
87.1 | 85.63 | 0.0169 | 86 | 0.876681721784925 | 10.2362647858593 | 0.868757340937373
86 | 86.79 | 0.0092 | 87.26 | 0.907607969680494 | 7.59324024469571 | 0.872354617595825
87.26 | 85.35 | 0.0219 | 86 | 0.963046324742614 | 2.52920153372814 | 0.854052902081886
86 | 85.04 | 0.0111 | 85.68 | 0.984857752501153 | 0.658717160003272 | 0.818153667442131
85.68 | 86.65 | 0.0113 | 87 | 0.968673034113725 | 2.3751701572406 | 0.782575372168168
87 | 90.00 | 0.0345 | 90 | 0.910991684444145 | 8.01456054235934 | 0.742602997511869
90 | 90.69 | 0.0076 | 90.61 | 0.897821640533335 | 9.33440206466203 | 0.713122343798605
90.61 | 90.13 | 0.0053 | 90.13 | 0.909958318693295 | 8.11836657246885 | 0.676080976394243
90.13 | 88.54 | 0.0177 | 89.08 | 0.959429536023691 | 3.07240205281187 | 0.608007093767562
89.08 | 89.76 | 0.0076 | 89.81 | 0.91029848144619 | 8.00500306348446 | 0.404654669040077
89.81 | 99.27 | 0.1054 | 98.45 | 0.657207082561843 | 34.5727385355619 | 0.393214748683796
98.45 | 102.53 | 0.0415 | 102.94 | 0.704409588700779 | 30.0196224514047 | 0.502462865561183
102.94 | 103.45 | 0.0049 | 103.99 | 0.703297557503838 | 30.3100075437914 | 0.46367875870034
103.99 | 102.47 | 0.0147 | 103.11 | 0.62837962126522 | 37.6738507164757 | 0.403487822887036
103.11 | 103.10 | 0.0001 | 104 | 0.643348564991283 | 36.1926878205066 | 0.412471976813411
104 | 102.47 | 0.0147 | 103.29 | 0.609715827989882 | 39.4938840550046 | 0.434204693264911
103.29 | 100.19 | 0.0300 | 100.04 | 0.714689142171646 | 28.6965783703781 | 0.579882468389968
100.04 | 96.87 | 0.0317 | 96.89 | 0.891706172449747 | 10.4761802869589 | 0.673676693300631
96.89 | 95.65 | 0.0128 | 96.31 | 0.996480964507578 | -0.324693104249417 | 0.614510491219348
96.31 | 99.98 | 0.0381 | 99.58 | 0.805484441868333 | 19.7717089277751 | 0.512838450280928
99.58 | 101.72 | 0.0215 | 100.77 | 0.580782945386486 | 43.1942330487114 | 0.411626746536197
100.77 | 102.31 | 0.0153 | 101.62 | 0.60960923807431 | 40.3630398853694 | 0.534574960836081
101.62 | 103.28 | 0.0163 | 103.02 | 0.715901485259652 | 29.5241464649391 | 0.578753862361246
103.02 | 104.93 | 0.0185 | 105.03 | 0.755513345311285 | 25.5752266330287 | 0.637919333414959
105.03 | 102.17 | 0.0272 | 102.13 | 0.85397742981656 | 14.9562551932185 | 0.706505426646503
102.13 | 103.39 | 0.0123 | 103.32 | 0.816331012523094 | 19.0456461040167 | 0.678069331200632
103.32 | 97.81 | 0.0533 | 101 | 1.59944623063686 | -63.7322919498157 | 0.754678672545085
101 | 103.53 | 0.0251 | 103.6 | 0.863779780883968 | 14.0441886225418 | 0.661109144730258
103.6 | 103.94 | 0.0033 | 103.94 | 0.838387460079692 | 16.7970520107185 | 0.577059699997509
103.94 | 105.92 | 0.0191 | 105.75 | -0.133229605047718 | 120.010159783732 | 0.804055392782976
105.75 | 105.93 | 0.0017 | 105.98 | -0.145112549418295 | 121.311379359051 | 0.969009451994014
105.98 | 105.95 | 0.0003 | 105.95 | 0 | 105.949999994131 | 1
105.95 | 105.89 | 0.0005 | 105.95 | -0.13230769230761 | 119.911107692316 | 0.99999999999768
105.95 | 99.90 | 0.0571 | 105.52 | 1.72872340425854 | -82.5132446808904 | 1.00000000000077
105.52 | 111.29 | 0.0547 | 108.77 | -1.34285714286032 | 257.357142857695 | 1.00000000000725
108.77 | 119.17 | 0.0956 | 110.65 | 6.08695652160159 | -554.349999972815 | 1.00000000000081
110.65 | 109.27 | 0.0125 | 109.25 | 0.0707692307696992 | 101.53473846147 | 0.999999999967684
109.25 | 118.89 | 0.0883 | 109.02 | 3.03738317759831 | -212.244018694157 | 1.00000000001163
(51 rows)
贵州茅台的结果
预测偏差还比较理想(可能有些股票受到的外界因素影响较大, 有些比较小, 因为本文没有引入多元分析, 就这样吧)
postgres=# select (check_predict(i)).* from generate_series(0,50) t(i);
rv | pv | dev | zv | v_slope | v_inter | v_r2
--------+--------+--------+--------+-------------------+-------------------+-------------------
| 155.68 | | 154.75 | 0.749327368763041 | 39.7193487454018 | 0.443299946747894
154.75 | 156.33 | 0.0102 | 155.00 | 0.658983381690956 | 54.1888692212598 | 0.31605703462515
155.00 | 158.61 | 0.0233 | 157.81 | 0.505691375378367 | 78.8096738403756 | 0.240429553874766
157.81 | 159.35 | 0.0097 | 159.02 | 0.471362121189687 | 84.3914502631931 | 0.23632158869318
159.02 | 159.89 | 0.0055 | 159.87 | 0.502631474677655 | 79.5350129389031 | 0.262084379639135
159.87 | 160.46 | 0.0037 | 161.00 | 0.486478989379233 | 82.1377272523488 | 0.29236983925612
161.00 | 161.73 | 0.0045 | 162.74 | 0.592859008065614 | 65.2441140525351 | 0.38051931752417
162.74 | 161.22 | 0.0094 | 162.33 | 0.522125410976863 | 76.4612098979163 | 0.450724600316921
162.33 | 161.07 | 0.0078 | 161.50 | 0.670999115633002 | 52.7013374696206 | 0.535249445391989
161.50 | 160.77 | 0.0045 | 160.92 | 0.72782591656929 | 43.6501486130006 | 0.574758659704234
160.92 | 156.66 | 0.0264 | 156.49 | 0.885275616598876 | 18.1277330183775 | 0.655113491671201
156.49 | 157.82 | 0.0085 | 156.70 | 0.757494701869893 | 39.12388483696 | 0.495319263616962
156.70 | 161.18 | 0.0286 | 160.71 | 0.725555251667536 | 44.5773888438119 | 0.584398418586498
160.71 | 160.13 | 0.0036 | 159.25 | 0.718340613774721 | 45.7320036637828 | 0.626975960572309
159.25 | 158.92 | 0.0021 | 158.02 | 0.776715149169011 | 36.1819821709587 | 0.676670415905896
158.02 | 159.06 | 0.0066 | 158.47 | 0.812642712296025 | 30.2825991865648 | 0.679920408166994
158.47 | 160.77 | 0.0145 | 160.50 | 0.824536957368669 | 28.4277502764865 | 0.708045226118134
160.50 | 161.95 | 0.0090 | 161.90 | 0.843561194042496 | 25.3766280000652 | 0.722573245190375
161.90 | 161.51 | 0.0024 | 161.49 | 0.847987413256958 | 24.5663395825434 | 0.721633674185219
161.49 | 163.60 | 0.0131 | 163.87 | 0.870581601947346 | 20.9406294921026 | 0.725206713695765
163.87 | 163.51 | 0.0022 | 163.70 | 0.833749346623708 | 27.0240967784583 | 0.670383837164832
163.70 | 168.62 | 0.0301 | 167.65 | 1.05254359115864 | -7.83507005049674 | 0.735574587910647
167.65 | 165.25 | 0.0143 | 165.70 | 0.883481284376093 | 18.8595907684336 | 0.566222610315986
165.70 | 163.61 | 0.0126 | 164.70 | 0.760665237060595 | 38.3314388491422 | 0.437037290387292
164.70 | 161.46 | 0.0197 | 162.58 | 0.63926420779486 | 57.5312362089702 | 0.386820980329109
162.58 | 158.80 | 0.0232 | 158.81 | 0.717636038072266 | 44.8353807921515 | 0.569066830110592
158.81 | 158.34 | 0.0030 | 158.20 | 0.761022139545313 | 37.9453143278904 | 0.597076723252649
158.20 | 156.01 | 0.0138 | 156.00 | 0.890859569629593 | 17.0349264413836 | 0.660262576219616
156.00 | 155.91 | 0.0006 | 156.00 | 0.928603251444626 | 11.051262047026 | 0.569697453056979
156.00 | 158.28 | 0.0146 | 157.72 | 0.723658951735034 | 44.1435343246399 | 0.44078135697188
157.72 | 159.30 | 0.0100 | 158.50 | 0.593435970103012 | 65.2440679649604 | 0.394699579585899
158.50 | 159.91 | 0.0089 | 159.27 | 0.632807705774723 | 59.1184436985211 | 0.387382904311161
159.27 | 160.63 | 0.0085 | 160.00 | 0.604419927649386 | 63.9233718732132 | 0.330175855885006
160.00 | 162.13 | 0.0133 | 162.00 | 0.479607769753778 | 84.4368575468047 | 0.257868311166715
162.00 | 159.99 | 0.0124 | 158.74 | 0.624745284091551 | 60.8132213491609 | 0.426844197393141
158.74 | 160.57 | 0.0115 | 159.75 | 0.6106793906001 | 63.0161876923878 | 0.337283942825567
159.75 | 161.95 | 0.0138 | 162.00 | 0.453875422780405 | 88.4214116897916 | 0.235385451808691
162.00 | 162.13 | 0.0008 | 162.20 | 0.514218487026421 | 78.7230165782795 | 0.291003288858034
162.20 | 162.09 | 0.0007 | 162.02 | 0.556324729871488 | 71.9532675373276 | 0.326799171637252
162.02 | 162.16 | 0.0009 | 162.04 | 0.579222185293716 | 68.3028288619664 | 0.344557168377077
162.04 | 162.16 | 0.0007 | 161.81 | 0.556599549002475 | 72.0969606318282 | 0.307924224254188
161.81 | 162.66 | 0.0052 | 162.48 | 0.500693143269678 | 81.305630489949 | 0.298678253677398
162.48 | 162.22 | 0.0016 | 161.96 | 0.568957052943135 | 70.0759551450734 | 0.346462148633283
161.96 | 162.89 | 0.0057 | 163.10 | 0.39281014679101 | 98.8215502236369 | 0.340803249529129
163.10 | 164.15 | 0.0065 | 164.80 | 0.640630057919025 | 58.5763291969098 | 0.536313525971144
164.80 | 163.63 | 0.0071 | 164.00 | 0.7153108267714 | 46.3161409479079 | 0.611327694763799
164.00 | 162.73 | 0.0078 | 163.00 | 0.671618352789207 | 53.2545511273858 | 0.679718050633678
163.00 | 160.66 | 0.0144 | 160.17 | 0.646714009825995 | 57.0713176290233 | 0.804063835532055
160.17 | 159.11 | 0.0066 | 158.29 | 0.769358075679891 | 37.3298385738639 | 0.848976826898375
158.29 | 161.29 | 0.0190 | 160.74 | 0.845694258048787 | 25.3560723887127 | 0.887140705169265
160.74 | 165.13 | 0.0273 | 164.30 | 0.928055617122675 | 12.6470087942632 | 0.910309053795291
(51 rows)