Greenplum 源码安装

6 minute read

背景

数据库规划

1  master - standby  
5  segment(s) - segment mirror(s)  

硬件规划

6台主机

master节点配置(cpu 8核, mem 16G, network 1GB, disk 1*250G)

segments配置

建议规划

所有节点执行

# yum -y install rsync coreutils glib2 lrzsz sysstat e4fsprogs xfsprogs ntp readline-devel zlib zlib-devel openssl openssl-devel pam-devel libxml2-devel libxslt-devel python-devel tcl-devel gcc make smartmontools flex bison perl perl-devel perl-ExtUtils* OpenIPMI-tools openldap openldap-devel logrotate python-py gcc-c++ libevent-devel apr-devel libcurl-devel bzip2-devel libyaml-devel  
  
# vi /etc/sysctl.conf  
kernel.shmmax = 68719476736  
kernel.shmmni = 4096  
kernel.shmall = 4000000000  
kernel.sem = 50100 64128000 50100 1280  
kernel.sysrq = 1  
kernel.core_uses_pid = 1  
kernel.msgmnb = 65536  
kernel.msgmax = 65536  
kernel.msgmni = 2048  
net.ipv4.tcp_syncookies = 1  
net.ipv4.ip_forward = 0  
net.ipv4.conf.default.accept_source_route = 0  
net.ipv4.tcp_tw_recycle = 1  
net.ipv4.tcp_max_syn_backlog = 4096  
net.ipv4.conf.all.arp_filter = 1  
net.ipv4.ip_local_port_range = 1025 65535  
net.core.netdev_max_backlog = 10000  
net.core.rmem_max = 2097152  
net.core.wmem_max = 2097152  
vm.overcommit_memory = 2  
fs.file-max = 7672460  
net.ipv4.netfilter.ip_conntrack_max = 655360  
fs.aio-max-nr = 1048576  
net.ipv4.tcp_keepalive_time = 72   
net.ipv4.tcp_keepalive_probes = 9   
net.ipv4.tcp_keepalive_intvl = 7  
  
# sysctl -p  
  
# vi /etc/security/limits.conf  
* soft nofile 131072    
* hard nofile 131072    
* soft nproc 131072   
* hard nproc 131072  
* soft    memlock unlimited  
* hard    memlock unlimited  
  
# rm -f /etc/security/limits.d/90-nproc.conf  

把所有主机的IP和主机名列到/etc/hosts中。使用真实的主机名。

# vi /etc/hosts  
127.0.0.1  localhost  
xxx.xxx.193.96  digoal193096.zmf  
xxx.xxx.199.92  digoal199092.zmf  
xxx.xxx.200.164  digoal200164.zmf  
xxx.xxx.204.16  digoal204016.zmf  
xxx.xxx.204.63  digoal204063.zmf  
xxx.xxx.209.198  digoal209198.zmf  

文件系统mount option

如果是ext4

ext4 mount option  
noatime,nodiratime,nobarrier,discard,nodelalloc,data=writeback  

如果是xfs

rw,noatime,inode64,allocsize=16m  

如果是ZFS

set zfs:zfs_arc_max=0x600000000   

设置块设备预读大小

# /sbin/blockdev --setra 16384 /dev/xvda1  

创建一个管理greenplum 的用户,这里使用digoal

创建一个目录,放gp软件, 给greenplum管理用户写权限,也可以直接使用用户的HOME目录,例如/home/digoal/greenplum-db-4.3.6.1

所有 segment 节点

{  
创建一个目录,放数据库, 给greenplum管理用户写权限  
# mkdir -p /data01/gpdata  
# chown -R digoal /data01/gpdata  
# chmod -R 700 /data01/gpdata  
}  

master节点执行

{  
# mkdir -p /data01/gpdata/master_pgdata  
# chown -R digoal /data01/gpdata/master_pgdata  
# chmod 700 /data01/gpdata/master_pgdata  
}  

主节点执行

非源码安装:

{  ----------------------------------------------  
  
下载greenplum-db-4.3.6.1-build-2-RHEL5-x86_64.zip  
unzip greenplum-db-4.3.6.1-build-2-RHEL5-x86_64.zip  
  
  
使用普通用户安装  
$ ./greenplum-db-4.3.6.1-build-2-RHEL5-x86_64.bin  
安装到 /home/digoal/greenplum-db-4.3.6.1  
  
$ cd /home/digoal/greenplum-db/bin  
$ ./pg_config   
BINDIR = /home/digoal/greenplum-db-4.3.6.1/bin  
DOCDIR = /home/digoal/greenplum-db-4.3.6.1/doc/postgresql  
HTMLDIR = /home/digoal/greenplum-db-4.3.6.1/doc/postgresql  
INCLUDEDIR = /home/digoal/greenplum-db-4.3.6.1/include  
PKGINCLUDEDIR = /home/digoal/greenplum-db-4.3.6.1/include/postgresql  
INCLUDEDIR-SERVER = /home/digoal/greenplum-db-4.3.6.1/include/postgresql/server  
LIBDIR = /home/digoal/greenplum-db-4.3.6.1/lib  
PKGLIBDIR = /home/digoal/greenplum-db-4.3.6.1/lib/postgresql  
LOCALEDIR = /home/digoal/greenplum-db-4.3.6.1/share/locale  
MANDIR = /home/digoal/greenplum-db-4.3.6.1/man  
SHAREDIR = /home/digoal/greenplum-db-4.3.6.1/share/postgresql  
SYSCONFDIR = /home/digoal/greenplum-db-4.3.6.1/etc/postgresql  
PGXS = /home/digoal/greenplum-db-4.3.6.1/lib/postgresql/pgxs/src/makefiles/pgxs.mk  
CC = gcc -m64  
CPPFLAGS = -D_GNU_SOURCE -I/home/digoal/greenplum-db-4.3.6.1/include  
CFLAGS = -O3 -funroll-loops -fargument-noalias-global -fno-omit-frame-pointer -g -finline-limit=1800 -std=gnu99 -Wall -Wmissing-prototypes -Wpointer-arith -Wendif-labels -Wformat-security -fno-strict-aliasing -fwrapv -Werror -I/home/digoal/greenplum-db-4.3.6.1/include  
CFLAGS_SL = -fpic  
LDFLAGS = -Wl,--as-needed -Wl,-rpath,'/data/pulse2-agent/agents/agent1/work/GPDB-4_3_x-rcbuilds/rhel5_x86_64/4.3.6.1-build-2_output/greenplum-db-4.3.6.1-build-2/lib',--enable-new-dtags -L/home/digoal/greenplum-db-4.3.6.1/lib  
LDFLAGS_SL =   
LIBS = -lpgport -lxml2 -lpam -lrt -lgssapi_krb5 -lcrypt -ldl -lm -L/home/digoal/greenplum-db-4.3.6.1/lib  
VERSION = PostgreSQL 8.2.15  
  
  
$cd /home/digoal/greenplum-db  
  
$cat greenplum_path.sh   
GPHOME=/home/digoal/greenplum-db-4.3.6.1  
  
# Replace with symlink path if it is present and correct  
if [ -h ${GPHOME}/../greenplum-db ]; then  
    GPHOME_BY_SYMLINK=`(cd ${GPHOME}/../greenplum-db/ && pwd -P)`  
    if [ x"${GPHOME_BY_SYMLINK}" = x"${GPHOME}" ]; then  
        GPHOME=`(cd ${GPHOME}/../greenplum-db/ && pwd -L)`/.  
    fi  
    unset GPHOME_BY_SYMLINK  
fi  
PATH=$GPHOME/bin:$GPHOME/ext/python/bin:$PATH  
LD_LIBRARY_PATH=$GPHOME/lib:$GPHOME/ext/python/lib:$LD_LIBRARY_PATH  
PYTHONPATH=$GPHOME/lib/python  
PYTHONHOME=$GPHOME/ext/python  
OPENSSL_CONF=$GPHOME/etc/openssl.cnf  
export GPHOME  
export PATH  
export LD_LIBRARY_PATH  
export PYTHONPATH  
export PYTHONHOME  
export OPENSSL_CONF  
  
添加到用户的环境变量  
$cat greenplum_path.sh >> ~/.bash_profile  
  
$. ~/.bash_profile  
  
}  ----------------------------------------------  

源码安装

{  ----------------------------------------------  
  
$ git clone https://github.com/greenplum-db/gpdb.git  
$ cd gpdb  
$ ./configure --prefix=/home/digoal/gpdb  
$ make  
$ make install  
$ ln -s /home/digoal/gpdb /home/digoal/greenplum-db  
  
$ vi ~/env_gp.sh  
GPHOME=/home/digoal/greenplum-db  
  
# Replace with symlink path if it is present and correct  
if [ -h ${GPHOME}/../greenplum-db ]; then  
    GPHOME_BY_SYMLINK=`(cd ${GPHOME}/../greenplum-db/ && pwd -P)`  
    if [ x"${GPHOME_BY_SYMLINK}" = x"${GPHOME}" ]; then  
        GPHOME=`(cd ${GPHOME}/../greenplum-db/ && pwd -L)`/.  
    fi  
    unset GPHOME_BY_SYMLINK  
fi  
PATH=$GPHOME/bin:$GPHOME/ext/python/bin:$PATH  
LD_LIBRARY_PATH=$GPHOME/lib:$GPHOME/ext/python/lib:$LD_LIBRARY_PATH  
PYTHONPATH=$GPHOME/lib/python  
#PYTHONHOME=$GPHOME/ext/python  
OPENSSL_CONF=$GPHOME/etc/openssl.cnf  
  
export GPHOME  
export PATH  
export LD_LIBRARY_PATH  
export PYTHONPATH  
#export PYTHONHOME  
export OPENSSL_CONF  
export MASTER_DATA_DIRECTORY=/data01/digoal/gpdata/gpseg-1  
export PGHOST=127.0.0.1  
export PGPORT=1921  
export PGUSER=digoal  
export PGDATABASE=postgres  
  
}  ----------------------------------------------  
# easy_install pip  
# pip install paramiko  
# pip install psutil  
# pip install lockfile  

使用gpseginstall将GP软件安装到所有节点

创建主机文件,包括所有节点以及主节点本身

$ vi host  
digoal193096.zmf  
digoal199092.zmf  
digoal200164.zmf  
digoal204016.zmf  
digoal204063.zmf  
digoal209198.zmf  

交换KEY,master使用gp管理用户(digoal)访问所有的segment不需要输入密码,master pub拷贝到所有的segment authorized_keys

$ gpssh-exkeys -f ./host  

安装软件到segment hosts

$gpseginstall -f ./host -u digoal  

初始化数据库

配置文件

$ cp /home/digoal/greenplum-db/docs/cli_help/gpconfigs/gpinitsystem_config ~/  
$ chmod 644 ~/gpinitsystem_config  
$ vi ~/gpinitsystem_config  
  
# FILE NAME: gpinitsystem_config  
  
# Configuration file needed by the gpinitsystem  
  
################################################  
#### REQUIRED PARAMETERS  
################################################  
  
#### Name of this Greenplum system enclosed in quotes.  
ARRAY_NAME="digoal greenplum dw"  
  
#### Naming convention for utility-generated data directories.  
SEG_PREFIX=gpseg  
  
#### Base number by which primary segment port numbers   
#### are calculated.  
PORT_BASE=40000  
  
#### File system location(s) where primary segment data directories   
#### will be created. The number of locations in the list dictate  
#### the number of primary segments that will get created per  
#### physical host (if multiple addresses for a host are listed in   
#### the hostfile, the number of segments will be spread evenly across  
#### the specified interface addresses).  
#### 每台主机安装几个segment,就需要几个目录  
declare -a DATA_DIRECTORY=(/data01/gpdata /data01/gpdata /data01/gpdata /data01/gpdata)  
  
#### OS-configured hostname or IP address of the master host.  
MASTER_HOSTNAME=digoal193096.zmf  
  
#### File system location where the master data directory   
#### will be created.  
MASTER_DIRECTORY=/data01/gpdata/master_pgdata  
  
#### Port number for the master instance.   
MASTER_PORT=1921  
  
#### Shell utility used to connect to remote hosts.  
TRUSTED_SHELL=ssh  
  
#### Maximum log file segments between automatic WAL checkpoints.  
CHECK_POINT_SEGMENTS=64  
  
#### Default server-side character set encoding.  
ENCODING=UTF-8  
  
################################################  
#### OPTIONAL MIRROR PARAMETERS  
################################################  
  
#### Base number by which mirror segment port numbers   
#### are calculated.  
#MIRROR_PORT_BASE=50000  
  
#### Base number by which primary file replication port   
#### numbers are calculated.  
#REPLICATION_PORT_BASE=41000  
  
#### Base number by which mirror file replication port   
#### numbers are calculated.   
#MIRROR_REPLICATION_PORT_BASE=51000  
  
#### File system location(s) where mirror segment data directories   
#### will be created. The number of mirror locations must equal the  
#### number of primary locations as specified in the   
#### DATA_DIRECTORY parameter.  
#declare -a MIRROR_DATA_DIRECTORY=(/data01/gpdata_mirror /data01/gpdata_mirror /data01/gpdata_mirror /data01/gpdata_mirror)  
  
  
################################################  
#### OTHER OPTIONAL PARAMETERS  
################################################  
  
#### Create a database of this name after initialization.  
DATABASE_NAME=digoal  
  
#### Specify the location of the host address file here instead of  
#### with the the -h option of gpinitsystem.  
MACHINE_LIST_FILE=/home/digoal/host  

编辑主机文件,不要包含master, standby,除非master,standby节点也需要当segment node使用.

$vi /home/digoal/host  
digoal193096.zmf  
digoal199092.zmf  
digoal200164.zmf  
digoal204016.zmf  
digoal204063.zmf  
digoal209198.zmf  

初始化数据库

$gpinitsystem -c ./gpinitsystem_config --locale=C --max_connections=48 --shared_buffers=1GB --su_password=digoal  

加入主备节点环境变量

$vi ~/env_gp.sh  
export MASTER_DATA_DIRECTORY=/data01/gpdata/master_pgdata/gpseg-1  
export PGHOST=127.0.0.1  
export PGPORT=1921  
export PGUSER=digoal  
export PGDATABASE=digoal  
  
$. ~/env_gp.sh  
  
$psql -h 127.0.0.1 -p 1921 -U digoal digoal  
psql (8.2.15)  
Type "help" for help.  
digoal=# select version();  
                                                                       version                                                                          
------------------------------------------------------------------------------------------------------------------------------------------------------  
 PostgreSQL 8.2.15 (Greenplum Database 4.3.6.1 build 2) on x86_64-unknown-linux-gnu, compiled by GCC gcc (GCC) 4.4.2 compiled on Oct  1 2015 15:14:22  
(1 row)  
  
digoal=#   create table test(id int primary key, info text, crt_time timestamp) with (OIDS=false, FILLFACTOR=95) DISTRIBUTED BY (id);  
NOTICE:  CREATE TABLE / PRIMARY KEY will create implicit index "test_pkey" for table "test"  
CREATE TABLE  

插入4000万测试数据

digoal=# \timing  
Timing is on.  
  
digoal=# do language plpgsql   
$$  
  
digoal$# declare  
digoal$# begin  
digoal$#   for i in 1..10 loop  
digoal$#     execute 'insert into test select generate_series('||(i-1)*4000000+1||','||i*4000000||'),md5(random()::text),clock_timestamp()';  
digoal$#   end loop;  
digoal$# end;  
digoal$#   
$$  
;  

gp目前不支持do语法,使用函数。

digoal=# create or replace function f_test() returns void as   
$$  
  
digoal$# declare  
digoal$# begin  
digoal$#   for i in 1..10 loop  
digoal$#     execute 'insert into test select generate_series('||(i-1)*4000000+1||','||i*4000000||'),md5(random()::text),clock_timestamp()';  
digoal$#   end loop;  
digoal$# end;  
digoal$#   
$$  
 language plpgsql;  
CREATE FUNCTION  
digoal=# select f_test();  
 f_test   
--------  
   
(1 row)  
  
Time: 179127.416 ms  
digoal=# select count(*) from test;  
  count     
----------  
 40000000  
(1 row)  
Time: 1119.652 ms  

使用9.5 的pgbench测试greenplum oltp性能

vi test.sql  
\setrandom id 1 40000000  
update test set info=info where id=:id;  
  
$/home/digoal/pgsql9.5/bin/pgbench -M prepared -n -r -P 1 -f ./test.sql -c 1 -j 1 -T 1000 -h 127.0.0.1 -p 1921 -U digoal digoal  
progress: 1.0 s, 25.0 tps, lat 38.682 ms stddev 42.391  
progress: 2.0 s, 31.0 tps, lat 32.264 ms stddev 26.702  
progress: 3.0 s, 34.0 tps, lat 29.486 ms stddev 7.113  
progress: 4.0 s, 36.0 tps, lat 27.873 ms stddev 5.031  
progress: 5.0 s, 30.0 tps, lat 32.921 ms stddev 9.209  
progress: 6.0 s, 36.0 tps, lat 28.026 ms stddev 5.132  
progress: 7.0 s, 29.0 tps, lat 34.364 ms stddev 30.162  
progress: 8.0 s, 38.0 tps, lat 26.911 ms stddev 4.510  
progress: 9.0 s, 35.0 tps, lat 28.445 ms stddev 15.298  
  
  
$/home/digoal/pgsql9.5/bin/pgbench -M prepared -n -r -P 1 -f ./test.sql -c 16 -j 16 -T 1000 -h 127.0.0.1 -p 1921 -U digoal digoal  
progress: 1.0 s, 20.0 tps, lat 558.569 ms stddev 159.308  
progress: 2.0 s, 35.0 tps, lat 467.861 ms stddev 28.536  
progress: 3.0 s, 38.0 tps, lat 422.836 ms stddev 29.095  
progress: 4.0 s, 27.0 tps, lat 550.747 ms stddev 103.408  
progress: 5.0 s, 31.0 tps, lat 577.198 ms stddev 144.169  
progress: 6.0 s, 36.0 tps, lat 437.262 ms stddev 13.159  
progress: 7.0 s, 35.0 tps, lat 456.761 ms stddev 28.191  
progress: 8.0 s, 33.0 tps, lat 461.790 ms stddev 43.644  
progress: 9.0 s, 36.0 tps, lat 470.014 ms stddev 29.238  
progress: 10.0 s, 42.0 tps, lat 391.530 ms stddev 15.373  
  
vi test.sql  
\setrandom id 1 40000000  
select * from test where id=:id;  

性能差不多。

修改配置的方法举例

$gpconfig -c optimizer -v on  
$gpstop -u  

Flag Counter

digoal’s 大量PostgreSQL文章入口