CentOS 7.x x64 部署 HAWQ

14 minute read

背景

在CentOS 6.x 上不管是源码或二进制部署HAWQ都有点苦逼,原因是6自带的软件版本都比较老,而HAWQ依赖的环境的版本都比较新。

《CentOS 6.x 部署HAWQ》

在7.x上部署HAWQ会轻松许多。

本文简单的讲一下在CentOS 7.x 的单机上使用源码部署HAWQ的过程。

安装7.x epel yum源

http://fedoraproject.org/wiki/EPEL

# wget https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm

# rpm -ivh epel-release-latest-7.noarch.rpm

# yum makecache

从yum仓库安装依赖包

# yum install -y man passwd sudo tar which git mlocate links make bzip2 net-tools \
  autoconf automake libtool m4 gcc gcc-c++ gdb bison flex gperf maven indent \
  libuuid-devel krb5-devel libgsasl-devel expat-devel libxml2-devel \
  perl-ExtUtils-Embed pam-devel python-devel libcurl-devel snappy-devel \
  thrift-devel libyaml-devel libevent-devel bzip2-devel openssl-devel \
  openldap-devel protobuf-devel readline-devel net-snmp-devel apr-devel \
  libesmtp-devel xerces-c-devel python-pip json-c-devel libhdfs3-devel \
  apache-ivy java-1.7.0-openjdk-devel \
  openssh-clients openssh-server  

# pip install --upgrade pip

# pip --retries=50 --timeout=300 install --upgrade paramiko pycrypto

# yum install -y lcov

install cmake

# useradd gpadmin
# su - gpadmin
$ mkdir -p /home/gpadmin/app

cd ~
wget https://cmake.org/files/v3.6/cmake-3.6.1.tar.gz
tar -zxvf cmake-3.6.1.tar.gz
cd cmake-3.6.1
./configure --parallel=32 --prefix=/home/gpadmin/app/cmake
make -j 32
make install
export PATH=/home/gpadmin/app/cmake/bin:$PATH

配置os

corefiles目录

# mkdir /data01/corefiles
# chmod 777 /data01/corefiles

内核参数

vi /etc/sysctl.conf
fs.aio-max-nr = 1048576
fs.file-max = 76724600
kernel.core_pattern= /data01/corefiles/core_%e_%u_%t_%s.%p         
kernel.sem = 4096 2147483647 2147483646 512000    
kernel.shmall = 107374182      
kernel.shmmax = 274877906944   
kernel.shmmni = 819200         
net.core.netdev_max_backlog = 10000
net.core.rmem_default = 262144       
net.core.rmem_max = 4194304          
net.core.wmem_default = 262144       
net.core.wmem_max = 4194304          
net.core.somaxconn = 4096
net.ipv4.tcp_max_syn_backlog = 4096
net.ipv4.tcp_keepalive_intvl = 20
net.ipv4.tcp_keepalive_probes = 3
net.ipv4.tcp_keepalive_time = 60
net.ipv4.tcp_mem = 8388608 12582912 16777216
net.ipv4.tcp_fin_timeout = 5
net.ipv4.tcp_synack_retries = 2
net.ipv4.tcp_syncookies = 1    
net.ipv4.tcp_timestamps = 1    
net.ipv4.tcp_tw_recycle = 0    
net.ipv4.tcp_tw_reuse = 1      
net.ipv4.tcp_max_tw_buckets = 262144
net.ipv4.tcp_rmem = 8192 87380 16777216
net.ipv4.tcp_wmem = 8192 65536 16777216
vm.dirty_background_bytes = 4096000000       
vm.dirty_expire_centisecs = 6000             
vm.dirty_ratio = 80                          
vm.dirty_writeback_centisecs = 50            
vm.min_free_kbytes = 2097152
vm.mmap_min_addr = 65536
vm.overcommit_memory = 0     
vm.overcommit_ratio = 90     
vm.swappiness = 0            
vm.zone_reclaim_mode = 0     
net.ipv4.ip_local_port_range = 40000 65535    

sysctl -p

资源限制

rm -f /etc/security/limits.conf
rm -f /etc/security/limits.d/*.conf

vi /etc/security/limits.d/hawq.conf
* soft    nofile  1024000
* hard    nofile  1024000
* soft    nproc   unlimited
* hard    nproc   unlimited
* soft    core    unlimited
* hard    core    unlimited
* soft    memlock unlimited
* hard    memlock unlimited

从yum安装R

centos 7带的R版本比较高,如果图省事就直接用YUM安装。

yum install -y R R-devel

JDK

http://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html

$ cd ~

get   
Java SE Development Kit 8u102
Linux x64   173.03 MB   jdk-8u102-linux-x64.tar.gz

$ tar -zxvf jdk-8u102-linux-x64.tar.gz
$ mv jdk1.8.0_102 /home/gpadmin/app/

$ export JAVA_HOME=/home/gpadmin/app/jdk1.8.0_102

hadoop

https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-common/SingleCluster.html

http://apache.fayea.com/hadoop/common/stable/

$ cd ~
$ wget http://apache.fayea.com/hadoop/common/stable/hadoop-2.7.3.tar.gz
$ tar -zxvf hadoop-2.7.3.tar.gz
$ mv hadoop-2.7.3 /home/gpadmin/app/
$ cd /home/gpadmin/app/hadoop-2.7.3

$ bin/hadoop
Usage: hadoop [--config confdir] [COMMAND | CLASSNAME]
  CLASSNAME            run the class named CLASSNAME
 or
  where COMMAND is one of:
  fs                   run a generic filesystem user client
  version              print the version
  jar <jar>            run a jar file
                       note: please use "yarn jar" to launch
                             YARN applications, not this command.
  checknative [-a|-h]  check native hadoop and compression libraries availability
  distcp <srcurl> <desturl> copy file or directories recursively
  archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive
  classpath            prints the class path needed to get the
  credential           interact with credential providers
                       Hadoop jar and the required libraries
  daemonlog            get/set the log level for each daemon
  trace                view and modify Hadoop tracing settings

Most commands print help when invoked w/o parameters.

hdfs

需要注意对齐hawq与hdfs的监听端口。

目录需要对应的权限。

su - root
# mkdir -p /data01/gpadmin/hadoop/tmp
# mkdir -p /data01/gpadmin/hadoop/sock
# mkdir -p /data01/gpadmin/hadoop/dfs/name
# mkdir -p /data01/gpadmin/hadoop/dfs/data
# chown -R gpadmin:gpadmin /data01/gpadmin

两个权限务必注意  
# chown gpadmin:root /data01/gpadmin/hadoop/sock
# chmod 755 /data01/gpadmin/hadoop/sock
# chown root:root /

# su - gpadmin
$ vi ~/.bash_profile
$ export PATH=/home/gpadmin/app/hadoop-2.7.3/bin:/home/gpadmin/app/hadoop-2.7.3/sbin:/home/gpadmin/app/cmake/bin:$PATH
$ export JAVA_HOME=/home/gpadmin/app/jdk1.8.0_102
$ export HADOOP_HOME=/home/gpadmin/app/hadoop-2.7.3

. ~/.bash_profile
cd ~ 

配置文件设置

vi etc/hadoop/core-site.xml

<configuration>
    <property>
        <name>fs.defaultFS</name>
        <value>hdfs://localhost:8020</value>
    </property>
    <property>
        <name>ipc.client.connection.maxidletime</name>
        <value>3600000</value>
    </property>
    <property>
        <name>ipc.client.connect.timeout</name>
        <value>300000</value>
    </property>
    <property>
        <name>ipc.server.listen.queue.size</name>
        <value>3300</value>
    </property>
    <property>
        <name>hadoop.tmp.dir</name>
        <value>file:/data01/gpadmin/hadoop/tmp</value>
        <description>Abase for other temporary   directories.</description>
    </property>
</configuration>

vi etc/hadoop/hdfs-site.xml

<configuration>
    <property>
        <name>dfs.replication</name>
        <value>1</value>
    </property>
    <property>
        <name>dfs.allow.truncate</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.block.access.token.enable</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.block.local-path-access.user</name>
        <value>gpadmin</value>
    </property>
    <property>
        <name>dfs.client.socket-timeout</name>
        <value>300000000</value>
    </property>
    <property>
        <name>dfs.client.use.legacy.blockreader.local</name>
        <value>false</value>
    </property>
    <property>
        <name>dfs.datanode.data.dir.perm</name>
        <value>750</value>
    </property>
    <property>
        <name>dfs.datanode.handler.count</name>
        <value>60</value>
    </property>
    <property>
        <name>dfs.datanode.max.transfer.threads</name>
        <value>40960</value>
    </property>
    <property>
        <name>dfs.datanode.socket.write.timeout</name>
        <value>7200000</value>
    </property>
    <property>
        <name>dfs.namenode.accesstime.precision</name>
        <value>0</value>
    </property>
    <property>
        <name>dfs.namenode.handler.count</name>
        <value>600</value>
    </property>
    <property>
        <name>dfs.support.append</name>
        <value>true</value>
    </property>
    <property>
         <name>dfs.namenode.name.dir</name>
         <value>file:/data01/gpadmin/hadoop/dfs/name</value>
    </property>
    <property>
         <name>dfs.datanode.data.dir</name>
         <value>file:/data01/gpadmin/hadoop/dfs/data</value>
    </property>
    <property>
        <name>dfs.client.read.shortcircuit</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.domain.socket.path</name>
        <value>/data01/gpadmin/hadoop/sock/dn._PORT</value>
    </property>
</configuration>

秘钥文件认证

$ cd ~
$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
$ chmod 0600 ~/.ssh/authorized_keys
$ ssh localhost date
Wed Aug 31 10:56:24 CST 2016

格式化namenode

$ hdfs namenode -format

配置hdfs环境变量文件

$ vi ~/app/hadoop-2.7.3/etc/hadoop/hadoop-env.sh
#export JAVA_HOME=${JAVA_HOME}
export JAVA_HOME=/home/gpadmin/app/jdk1.8.0_102
export HADOOP_HOME=/home/gpadmin/app/hadoop-2.7.3

启动hdfs

$ start-dfs.sh

$ hdfs dfsadmin -report

Configured Capacity: 210955796480 (196.47 GB)
Present Capacity: 192722915328 (179.49 GB)
DFS Remaining: 192722907136 (179.49 GB)
DFS Used: 8192 (8 KB)
DFS Used%: 0.00%
Under replicated blocks: 0
Blocks with corrupt replicas: 0
Missing blocks: 0
Missing blocks (with replication factor 1): 0

-------------------------------------------------
Live datanodes (1):

Name: 127.0.0.1:50010 (localhost)
Hostname: localhost
Decommission Status : Normal
Configured Capacity: 210955796480 (196.47 GB)
DFS Used: 8192 (8 KB)
Non DFS Used: 18232881152 (16.98 GB)
DFS Remaining: 192722907136 (179.49 GB)
DFS Used%: 0.00%
DFS Remaining%: 91.36%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Sat Sep 03 08:30:54 CST 2016



view http://xxx.xxx.xxx.xxx:50070/  

test hdfs

$ hdfs dfs -mkdir -p /user
$ hdfs dfs -put /home/gpadmin/hadoop-2.7.3.tar.gz /user/
$ hdfs dfs -ls -R /
-rw-r--r--   1 gpadmin supergroup  214092195 2016-09-02 22:09 /user/hadoop-2.7.3.tar.gz

允许gpadmin访问hdfs

$ /home/gpadmin/app/hadoop-2.7.3/bin/hdfs dfs -chown gpadmin hdfs://localhost:8020/

yarn

如果需要使用yarn来做资源管理的话,需要安装YARN

YARN is only needed when you want to use YARN as the global resource manager

$ cd ~/app/hadoop-2.7.3
$ cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml

$ vi etc/hadoop/mapred-site.xml

<configuration>
    <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
</configuration>

$ vi etc/hadoop/yarn-site.xml
<configuration>
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
</configuration>

$ start-yarn.sh

view http://xxx.xxx.xxx.xxx:8088/

install hawq

cd ~
git clone https://git-wip-us.apache.org/repos/asf/incubator-hawq.git

CODE_BASE=`pwd`/incubator-hawq
  
cd ~
wget ftp://ftp.gnu.org/gnu/gsasl/libgsasl-1.8.0.tar.gz
tar -zxvf libgsasl-1.8.0.tar.gz
cd libgsasl-1.8.0
./configure --prefix=/home/gpadmin/app/sasl
make -j 32 
make install

cd ~
git clone https://github.com/google/googletest
cd googletest
mkdir build
cd build
cmake -DCMAKE_INSTALL_PREFIX:PATH=/home/gpadmin/app/google ..
make -j 32
make install


cd ~
cd $CODE_BASE/depends/libhdfs3

/*
  git clone https://github.com/Pivotal-Data-Attic/pivotalrd-libhdfs3
*/

mkdir build
cd build
../bootstrap --prefix=/home/gpadmin/app/libhdfs3 --dependency=/home/gpadmin/app/sasl:/home/gpadmin/app/google
make -j 32
make install

YARN

libyarn

cd ~
cd $CODE_BASE/depends/libyarn
mkdir build
cd build
../bootstrap --prefix=/home/gpadmin/app/libyarn --dependency=/home/gpadmin/app/sasl:/home/gpadmin/app/google
make -j 32
make install

编译hawq

cd ~
cd $CODE_BASE

CPPFLAGS="-I/home/gpadmin/app/libyarn/include -I/home/gpadmin/app/hadoop-2.7.3/include -I/home/gpadmin/app/libhdfs3/include" LDFLAGS="-L/home/gpadmin/app/libyarn/lib -L/home/gpadmin/app/hadoop-2.7.3/lib -L/home/gpadmin/app/libhdfs3/lib" ./configure --prefix=/home/gpadmin/app/hawq --with-python --with-r --with-pgcrypto --with-openssl --enable-debug --enable-orca --without-libyarn --without-libhdfs3 --without-thrift

# Run command to build and install
# To build concurrently , run make with -j option. For example, make -j8
# On Linux system without large memory, you will probably encounter errors like
# "Error occurred during initialization of VM" and/or "Could not reserve enough space for object heap"
# and/or "out of memory", try to set vm.overcommit_memory = 1 temporarily, and/or avoid "-j" build,
# and/or add more memory and then rebuild.
# On mac os, you will probably see this error: "'openssl/ssl.h' file not found".
# "brew link openssl --force" should be able to solve the issue.
make -j 32
  
# Install HAWQ
make install

配置环境变量

cd ~
vi .bash_profile

. /home/gpadmin/app/hawq/greenplum_path.sh
export PATH=/home/gpadmin/app/hadoop-2.7.3/bin:/home/gpadmin/app/hadoop-2.7.3/sbin:/home/gpadmin/app/cmake/bin:$PATH
export JAVA_HOME=/home/gpadmin/app/jdk1.8.0_102
export HADOOP_HOME=/home/gpadmin/app/hadoop-2.7.3
export LD_LIBRARY_PATH=/home/gpadmin/app/libyarn/lib:/home/gpadmin/app/libhdfs3/lib:/home/gpadmin/app/google/lib:/home/gpadmin/app/sasl/lib:$LD_LIBRARY_PATH
export PGHOST=127.0.0.1
export PGPORT=1921
export PGDATABASE=postgres
export PGUSER=gpadmin
export PGPASSWORD=gpadmin
export PGDATA=/data01/gpadmin/pgdata_master

. ./.bash_profile

配置greenplum_path.sh(否则可能通过remote_ssh调用pg_ctl时报fgets failed的错误)

vi /home/gpadmin/app/hawq/greenplum_path.sh
append below  


export PATH=/home/gpadmin/app/hadoop-2.7.3/bin:/home/gpadmin/app/hadoop-2.7.3/sbin:/home/gpadmin/app/cmake/bin:$PATH
export JAVA_HOME=/home/gpadmin/app/jdk1.8.0_102
export HADOOP_HOME=/home/gpadmin/app/hadoop-2.7.3
export LD_LIBRARY_PATH=/home/gpadmin/app/libyarn/lib:/home/gpadmin/app/libhdfs3/lib:/home/gpadmin/app/google/lib:/home/gpadmin/app/sasl/lib:$LD_LIBRARY_PATH
export PGHOST=127.0.0.1
export PGPORT=1921
export PGDATABASE=postgres
export PGUSER=gpadmin
export PGPASSWORD=gpadmin
export PGDATA=/data01/gpadmin/pgdata_master

配置初始化数据库集群目录

$ mkdir -p /data01/gpadmin/pgdata_master
$ mkdir -p /data01/gpadmin/pgdata_segment
$ mkdir -p /data01/gpadmin/pgdata_master_tmp
$ mkdir -p /data01/gpadmin/pgdata_segment_tmp

配置hawq配置文件(hawq-site.xml的优先级配置比postgresql.conf更高)

cd ~/app/hawq


vi etc/hawq-site.xml

<configuration>
        <property>
                <name>hawq_master_address_host</name>
                <value>0.0.0.0</value>
                <description>The host name of hawq master.</description>
        </property>

        <property>
                <name>hawq_master_address_port</name>
                <value>1921</value>
                <description>The port of hawq master.</description>
        </property>

        <property>
                <name>hawq_standby_address_host</name>
                <value>none</value>
                <description>The host name of hawq standby master.</description>
        </property>

        <property>
                <name>hawq_segment_address_port</name>
                <value>40000</value>
                <description>The port of hawq segment.</description>
        </property>

        <property>
                <name>hawq_dfs_url</name>
                <value>localhost:8020/hawq_default</value>
                <description>URL for accessing HDFS.</description>
        </property>

        <property>
                <name>hawq_master_directory</name>
                <value>/data01/gpadmin/pgdata_master</value>
                <description>The directory of hawq master.</description>
        </property>

        <property>
                <name>hawq_segment_directory</name>
                <value>/data01/gpadmin/pgdata_segment</value>
                <description>The directory of hawq segment.</description>
        </property>

        <property>
                <name>hawq_master_temp_directory</name>
                <value>/data01/gpadmin/pgdata_master_tmp</value>
                <description>The temporary directory reserved for hawq master.</description>
        </property>

        <property>
                <name>hawq_segment_temp_directory</name>
                <value>/data01/gpadmin/pgdata_segment_tmp</value>
                <description>The temporary directory reserved for hawq segment.</description>
        </property>

        <property>
                <name>hawq_global_rm_type</name>
                <value>none</value>
                <description>The resource manager type to start for allocating resource.
                                         'none' means hawq resource manager exclusively uses whole
                                         cluster; 'yarn' means hawq resource manager contacts YARN
                                         resource manager to negotiate resource.
                </description>
        </property>

        <property>
                <name>hawq_rm_memory_limit_perseg</name>
                <value>64GB</value>
                <description>The limit of memory usage in a hawq segment when
                                         hawq_global_rm_type is set 'none'.
                </description>
        </property>

        <property>
                <name>hawq_rm_nvcore_limit_perseg</name>
                <value>16</value>
                <description>The limit of virtual core usage in a hawq segment when
                                         hawq_global_rm_type is set 'none'.
                </description>
        </property>

        <property>
                <name>hawq_rm_yarn_address</name>
                <value>localhost:8032</value>
                <description>The address of YARN resource manager server.</description>
        </property>

        <property>
                <name>hawq_rm_yarn_scheduler_address</name>
                <value>localhost:8030</value>
                <description>The address of YARN scheduler server.</description>
        </property>

        <property>
                <name>hawq_rm_yarn_queue_name</name>
                <value>default</value>
                <description>The YARN queue name to register hawq resource manager.</description>
        </property>

        <property>
                <name>hawq_rm_yarn_app_name</name>
                <value>hawq</value>
                <description>The application name to register hawq resource manager in YARN.</description>
        </property>

        <property>
                <name>hawq_re_cpu_enable</name>
                <value>false</value>
                <description>The control to enable/disable CPU resource enforcement.</description>
        </property>

        <property>
                <name>hawq_re_cgroup_mount_point</name>
                <value>/sys/fs/cgroup</value>
                <description>The mount point of CGroup file system for resource enforcement.
                                         For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system.
                </description>
        </property>

        <property>
                <name>hawq_re_cgroup_hierarchy_name</name>
                <value>hawq</value>
                <description>The name of the hierarchy to accomodate CGroup directories/files for resource enforcement.
                                         For example, /sys/fs/cgroup/cpu/hawq for CPU sub-system.
                </description>
        </property>

        <property>
                <name>default_hash_table_bucket_number</name>
                <value>1</value>
        </property>

</configuration>

初始化集群

[gpadmin@digoal ~]$ hawq init cluster --locale=C --shared_buffers=256MB
20160903:21:02:02:029744 hawq_init:digoal:gpadmin-[INFO]:-Prepare to do 'hawq init'
20160903:21:02:02:029744 hawq_init:digoal:gpadmin-[INFO]:-You can find log in:
20160903:21:02:02:029744 hawq_init:digoal:gpadmin-[INFO]:-/home/gpadmin/hawqAdminLogs/hawq_init_20160903.log
20160903:21:02:02:029744 hawq_init:digoal:gpadmin-[INFO]:-GPHOME is set to:
20160903:21:02:02:029744 hawq_init:digoal:gpadmin-[INFO]:-/home/gpadmin/app/hawq
20160903:21:02:02:029744 hawq_init:digoal:gpadmin-[INFO]:-Init hawq with args: ['init', 'cluster']

Continue with HAWQ init Yy|Nn (default=N):
> y
20160903:21:02:03:029744 hawq_init:digoal:gpadmin-[INFO]:-No standby host configured, skip it
20160903:21:02:03:029744 hawq_init:digoal:gpadmin-[INFO]:-Check if hdfs path is available
20160903:21:02:03:029744 hawq_init:digoal:gpadmin-[WARNING]:-WARNING:'hdfs://localhost:8020/hawq_default' does not exist, create it ...
2016-09-03 21:02:03.692149, p29851, th140383720921408, WARNING the number of nodes in pipeline is 1 [localhost(127.0.0.1)], is less than the expected number of replica 3 for block [block pool ID: BP-1170303990-127.0.0.1-1472824907274 block ID 1073741844_1024] file /hawq_default/testFile
20160903:21:02:03:029744 hawq_init:digoal:gpadmin-[INFO]:-1 segment hosts defined
20160903:21:02:03:029744 hawq_init:digoal:gpadmin-[INFO]:-Set default_hash_table_bucket_number as: 1
20160903:21:02:04:029744 hawq_init:digoal:gpadmin-[INFO]:-Start to init master node: '0.0.0.0'
20160903:21:02:22:029744 hawq_init:digoal:gpadmin-[INFO]:-20160903:21:02:22:029973 hawqinit.sh:digoal:gpadmin-[INFO]:-Loading hawq_toolkit...
20160903:21:02:22:029744 hawq_init:digoal:gpadmin-[INFO]:-Master init successfully
20160903:21:02:22:029744 hawq_init:digoal:gpadmin-[INFO]:-Init segments in list: ['localhost']
20160903:21:02:22:029744 hawq_init:digoal:gpadmin-[INFO]:-Total segment number is: 1
.20160903:21:02:22:029744 hawq_init:digoal:gpadmin-[INFO]:-20160903:21:02:22:030426 hawqinit.sh:digoal:gpadmin-[ERROR]:-Postgres initdb failed
20160903:21:02:22:030426 hawqinit.sh:digoal:gpadmin-[ERROR]:-Segment init failed on digoal.com
20160903:21:02:22:029744 hawq_init:digoal:gpadmin-[ERROR]:-HAWQ init failed on localhost
.
20160903:21:02:24:029744 hawq_init:digoal:gpadmin-[INFO]:-0 of 1 segments init successfully
20160903:21:02:24:029744 hawq_init:digoal:gpadmin-[ERROR]:-Segments init failed, exit

如果master init成功但是segment init不成功,查日志信息,找到原因,直接初始化segment

20160903:08:53:00:023687 hawq_init:digoal:gpadmin-[INFO]:-20160903:08:53:00:023915 hawqinit.sh:digoal:gpadmin-[INFO]:-Loading hawq_toolkit...
20160903:08:53:00:023687 hawq_init:digoal:gpadmin-[INFO]:-Master init successfully
20160903:08:53:00:023687 hawq_init:digoal:gpadmin-[INFO]:-Init segments in list: ['localhost']
20160903:08:53:00:023687 hawq_init:digoal:gpadmin-[INFO]:-Total segment number is: 1
fgets failure: Success
The program "postgres" is needed by initdb but was either not found in the same directory as "/home/gpadmin/app/hawq/bin/initdb" or failed unexpectedly.
Check your installation; "postgres -V" may have more information.
20160903:08:53:00:024177 hawqinit.sh:digoal:gpadmin-[ERROR]:-Postgres initdb failed
20160903:08:53:00:024177 hawqinit.sh:digoal:gpadmin-[ERROR]:-Segment init failed on digoal.com
20160903:08:53:00:023687 hawq_init:digoal:gpadmin-[INFO]:-20160903:08:53:00:024177 hawqinit.sh:digoal:gpadmin-[ERROR]:-Postgres initdb failed
20160903:08:53:00:024177 hawqinit.sh:digoal:gpadmin-[ERROR]:-Segment init failed on digoal.com
20160903:08:53:00:023687 hawq_init:digoal:gpadmin-[ERROR]:-HAWQ init failed on localhost
20160903:08:53:02:023687 hawq_init:digoal:gpadmin-[INFO]:-0 of 1 segments init successfully
20160903:08:53:02:023687 hawq_init:digoal:gpadmin-[ERROR]:-Segments init failed, exit

手工初始化segment

[gpadmin@digoal ~]$ hawq init segment
20160903:17:45:23:026080 hawq_init:digoal:gpadmin-[INFO]:-Prepare to do 'hawq init'
20160903:17:45:23:026080 hawq_init:digoal:gpadmin-[INFO]:-You can find log in:
20160903:17:45:23:026080 hawq_init:digoal:gpadmin-[INFO]:-/home/gpadmin/hawqAdminLogs/hawq_init_20160903.log
20160903:17:45:23:026080 hawq_init:digoal:gpadmin-[INFO]:-GPHOME is set to:
20160903:17:45:23:026080 hawq_init:digoal:gpadmin-[INFO]:-/home/gpadmin/app/hawq
20160903:17:45:23:026080 hawq_init:digoal:gpadmin-[INFO]:-Init hawq with args: ['init', 'segment']

Continue with HAWQ init Yy|Nn (default=N):
> y
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_master_address_host is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_master_address_port is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_master_directory is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_segment_directory is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_segment_address_port is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_dfs_url is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_master_temp_directory is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-Check: hawq_segment_temp_directory is set
20160903:17:45:24:026080 hawq_init:digoal:gpadmin-[INFO]:-No standby host configured, skip it
20160903:17:45:39:026080 hawq_init:digoal:gpadmin-[INFO]:-Segment init successfully

进程

30846 gpadmin   20   0  804300   5308    696 S   0.0  0.1   0:00.05 postgres: port 40000, writer process            
30845 gpadmin   20   0  297824   5124    504 S   0.0  0.1   0:00.00 postgres: port 40000, stats collector process   
30848 gpadmin   20   0  808428   8376   1240 S   0.0  0.1   0:00.09 postgres: port 40000, segment resource manager  
30842 gpadmin   20   0  295572   5020    424 S   0.0  0.1   0:00.00 postgres: port 40000, logger process            
30847 gpadmin   20   0  804300   5092    484 S   0.0  0.1   0:00.00 postgres: port 40000, checkpoint process        
30332 gpadmin   20   0  838076   8716   4008 S   0.0  0.1   0:00.10 postgres: port  1921, writer process            
30331 gpadmin   20   0  297820   5132    508 S   0.0  0.1   0:00.00 postgres: port  1921, stats collector process   
30334 gpadmin   20   0  839084   6660   1292 S   0.0  0.1   0:00.00 postgres: port  1921, seqserver process         
30337 gpadmin   20   0  843124  10412   2468 S   0.0  0.1   0:00.43 postgres: port  1921, master resource manager   
30328 gpadmin   20   0  295568   5032    432 S   0.0  0.1   0:00.01 postgres: port  1921, master logger process     
30333 gpadmin   20   0  838212   6360   1640 S   0.0  0.1   0:00.01 postgres: port  1921, checkpoint process        
30335 gpadmin   20   0  838224   5260    592 S   0.0  0.1   0:00.01 postgres: port  1921, WAL Send Server process   
30336 gpadmin   20   0  838224   5280    612 S   0.0  0.1   0:00.00 postgres: port  1921, DFS Metadata Cache process
19869 gpadmin   20   0 2847072 259380  10768 S   0.0  3.2   1:24.49 /home/gpadmin/app/jdk1.8.0_102/bin/java -Dproc_secondarynamenode -Xmx1000m -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/home/gpadmin/app/hadoop-2.7.3/logs -Dhadoo+ 
18910 gpadmin   20   0 3497352 478808  10824 S   2.3  5.9  20:49.72 /home/gpadmin/app/jdk1.8.0_102/bin/java -Dproc_namenode -Xmx1000m -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/home/gpadmin/app/hadoop-2.7.3/logs -Dhadoop.log.fil+ 
19640 gpadmin   20   0 2940220 289324  10800 S   0.3  3.6   3:55.08 /home/gpadmin/app/jdk1.8.0_102/bin/java -Dproc_datanode -Xmx1000m -Djava.net.preferIPv4Stack=true -Dhadoop.log.dir=/home/gpadmin/app/hadoop-2.7.3/logs -Dhadoop.log.fil+ 
30841 gpadmin   20   0  804300 405684 401076 S   0.7  5.0   0:00.50 /home/gpadmin/app/hawq/bin/postgres -D /data01/gpadmin/pgdata_segment -p 40000 --silent-mode=true -M segment -i
30327 gpadmin   20   0  837944 492676 488064 S   0.3  6.1   0:00.54 /home/gpadmin/app/hawq/bin/postgres -D /data01/gpadmin/pgdata_master -p 1921 --silent-mode=true -M master -i

测试hawq

[gpadmin@digoal ~]$ psql template1
psql (8.2.15)
Type "help" for help.
template1=# \l
                 List of databases
   Name    |  Owner  | Encoding | Access privileges 
-----------+---------+----------+-------------------
 postgres  | gpadmin | UTF8     | 
 template0 | gpadmin | UTF8     | 
 template1 | gpadmin | UTF8     | 
(3 rows)

压缩比测试

postgres=# create table test(id int, info text) with (appendonly=true,ORIENTATION=parquet) distributed by (id);
CREATE TABLE
postgres=# insert into test select generate_series(1,1000000),'test';
INSERT 0 1000000
postgres=# create table test(id int, info text) with (appendonly=true,ORIENTATION=parquet,COMPRESSTYPE=snappy) distributed by (id);
CREATE TABLE
postgres=# insert into test select generate_series(1,1000000),'test';
INSERT 0 1000000
postgres=# insert into test select * from test;
INSERT 0 1000000
postgres=# insert into test select * from test;
INSERT 0 2000000
postgres=# insert into test select * from test;
INSERT 0 4000000
postgres=# \timing
Timing is on.
postgres=# insert into test select * from test;
INSERT 0 8000000
Time: 8075.773 ms
postgres=# create table test1(id int, info text) with (appendonly=true,blocksize=2097152,COMPRESSTYPE=snappy) distributed by (id);
CREATE TABLE
Time: 84.373 ms
postgres=# insert into test1 select * from test;
INSERT 0 16000000
Time: 28059.969 ms

通过hdfs命令可以直接读取目录中的结构

$ hdfs dfs -ls -R /
drwxr-xr-x   - gpadmin supergroup          0 2016-09-03 08:52 /hawq_default
drwx------   - gpadmin supergroup          0 2016-09-03 17:46 /hawq_default/16385
drwx------   - gpadmin supergroup          0 2016-09-03 17:54 /hawq_default/16385/16387
drwx------   - gpadmin supergroup          0 2016-09-03 17:52 /hawq_default/16385/16387/16538
-rw-------   3 gpadmin supergroup   70036636 2016-09-03 17:53 /hawq_default/16385/16387/16538/1
drwx------   - gpadmin supergroup          0 2016-09-03 17:54 /hawq_default/16385/16387/16557
-rw-------   3 gpadmin supergroup  130120496 2016-09-03 17:54 /hawq_default/16385/16387/16557/1
drwx------   - gpadmin supergroup          0 2016-09-03 17:54 /hawq_default/16385/16387/16562
-rw-------   3 gpadmin supergroup          0 2016-09-03 17:54 /hawq_default/16385/16387/16562/1
-rw-------   3 gpadmin supergroup          4 2016-09-03 17:46 /hawq_default/16385/16387/PG_VERSION

表空间管理

postgres=# create table test1(id int primary key, info text) with (appendonly=false);
ERROR:  tablespace "dfs_default" does not support heap relation

postgres=# select * from pg_filesystem ;
-[ RECORD 1 ]------+--------------------------
fsysname           | hdfs
fsysconnfn         | gpfs_hdfs_connect
fsysdisconnfn      | gpfs_hdfs_disconnect
fsysopenfn         | gpfs_hdfs_openfile
fsysclosefn        | gpfs_hdfs_closefile
fsysseekfn         | gpfs_hdfs_seek
fsystellfn         | gpfs_hdfs_tell
fsysreadfn         | gpfs_hdfs_read
fsyswritefn        | gpfs_hdfs_write
fsysflushfn        | gpfs_hdfs_sync
fsysdeletefn       | gpfs_hdfs_delete
fsyschmodfn        | gpfs_hdfs_chmod
fsysmkdirfn        | gpfs_hdfs_createdirectory
fsystruncatefn     | gpfs_hdfs_truncate
fsysgetpathinfofn  | gpfs_hdfs_getpathinfo
fsysfreefileinfofn | gpfs_hdfs_freefileinfo
fsyslibfile        | $libdir/gpfshdfs.so
fsysowner          | 10
fsystrusted        | f
fsysacl            | 

postgres=# select * from pg_filespace;
   fsname   | fsowner | fsfsys | fsrep 
------------+---------+--------+-------
 pg_system  |      10 |      0 |     0
 dfs_system |      10 |  10895 |     0
(2 rows)

postgres=# select * from pg_filespace_entry ;
 fsefsoid | fsedbid |            fselocation             
----------+---------+------------------------------------
    16384 |       0 | hdfs://localhost:8020/hawq_default
(1 row)

postgres=# select * from pg_tablespace;
   spcname   | spcowner | spclocation | spcacl | spcprilocations | spcmirlocations | spcfsoid 
-------------+----------+-------------+--------+-----------------+-----------------+----------
 pg_default  |       10 |             |        |                 |                 |     3052
 pg_global   |       10 |             |        |                 |                 |     3052
 dfs_default |       10 |             |        |                 |                 |    16384
(3 rows)

HAWQ的用户目前只能使用基于HDFS的表空间,不能使用本地表空间。

postgres=# create table test1 (id int, info text) tablespace pg_default;
ERROR:  Creating table on tablespace 'pg_default' is not allowed

postgres=# create table test1 (id int, info text) tablespace pg_global;
ERROR:  permission denied for tablespace pg_global

postgres=# create tablespace tbs1 owner gpadmin filespace pg_system;
ERROR:  permission denied to create tablespace "tbs1"
HINT:  filespace pg_system is reserved for system use

起停集群

启动

$ start-dfs.sh
$ hawq start cluster -a

停止

$ hawq stop cluster -M fast -a  
$ stop-dfs.sh

如何删除集群

hawq stop cluster -M fast -a
hdfs dfs -rm -R /*
rm -rf /data01/gpadmin/pgdata_master
rm -rf /data01/gpadmin/pgdata_segment
rm -rf /data01/gpadmin/pgdata_master_tmp
rm -rf /data01/gpadmin/pgdata_segment_tmp

如何创建filespace,表空间

生成配置文件

hawq filespace -o /tmp

Enter a name for this filespace
> local
Enter replica num for filespace. If 0, default replica num is used (default=3)
> 1

Please specify the DFS location for the filespace (for example: localhost:9000/fs)
location> localhost:8020/localfs
20160903:21:57:13:031143 hawqfilespace:127.0.0.1:gpadmin-[INFO]:-[created]
20160903:21:57:13:031143 hawqfilespace:127.0.0.1:gpadmin-[INFO]:-
To add this filespace to the database please run the command:
   hawqfilespace --config /tmp/hawqfilespace_config_20160903_215642
   

创建 filespace

$ cat /tmp/hawqfilespace_config_20160903_215642 
filespace:local
fsreplica:1
dfs_url::localhost:8020/localfs

$ hawq filespace --config /tmp/hawqfilespace_config_20160903_215642
Reading Configuration file: '/tmp/hawqfilespace_config_20160903_215642'

CREATE FILESPACE local ON hdfs 
('localhost:8020/localfs/local') WITH (NUMREPLICA = 1);
20160903:21:57:36:031180 hawqfilespace:127.0.0.1:gpadmin-[INFO]:-Connecting to database
20160903:21:57:36:031180 hawqfilespace:127.0.0.1:gpadmin-[INFO]:-Filespace "local" successfully created

$ psql
psql (8.2.15)
Type "help" for help.

postgres=# select * from pg_filespace;
   fsname   | fsowner | fsfsys | fsrep 
------------+---------+--------+-------
 pg_system  |      10 |      0 |     0
 dfs_system |      10 |  10895 |     0
 local      |      10 |  10895 |     1
(3 rows)

创建表空间

postgres=# create tablespace local filespace local;
CREATE TABLESPACE

postgres=# \db
          List of tablespaces
    Name     |  Owner  | Filespae Name 
-------------+---------+---------------
 dfs_default | gpadmin | dfs_system
 local       | gpadmin | local
 pg_default  | gpadmin | pg_system
 pg_global   | gpadmin | pg_system
(4 rows)

使用表空间

postgres=# create table tbl (id int, info text) tablespace local;
CREATE TABLE

其他

目前hawq 不支持索引

postgres=# create index idx on tbl(id);
ERROR:  Cannot support create index statement yet

目前不支持读写表,只支持AO表

postgres=# create table tbl1 (id int, info text) with(appendonly=false);
ERROR:  tablespace "dfs_default" does not support heap relation

postgres=# create table tbl1 (id int, info text) with(appendonly=false) tablespace local;
ERROR:  tablespace "local" does not support heap relation

本地表空间(非HDFS存储)支持堆表,但是不允许使用。  
postgres=# create table tbl1 (id int, info text) with(appendonly=false) tablespace pg_default;
ERROR:  Creating table on tablespace 'pg_default' is not allowed

postgres=# create table tbl1 (id int, info text) with(appendonly=false) tablespace pg_global;
ERROR:  permission denied for tablespace pg_global

Flag Counter

digoal’s 大量PostgreSQL文章入口