1、查看磁盘总体使用情况以及可使用情况
df -lh
2、hadoop命令查看文件大小
hadoop fs -du -h /
3、hadoop查看总体使用情况
hadoop dfsadmin -report
4、查看文件在哪里
locate 文件名
5、文件按时间排序
ll -rt
6、查看tomcat是否启动
ps -ef | grep tomcat
7、查看端口使用情况
lsof -i:8080
netstat -tunpl | grep 8081
8、压缩解压缩
//将当前目录下的以mms_gateway_010.log.2012_03_1和mms_gateway_010.log.2012_03_2开头的文件打包压缩为20120511.tar.gz
9、查看文件是否有特殊字符(传输文件可能会自动添加特殊字符)
cat -A 文件名 cat -A lg.txt
10、hive导入本地文件到分区表
LOAD DATA LOCAL INPATH '/home/hadoop/hfxdoc/classmem_MrXu.txt' INTO TABLE classmem partition (teacher = 'Mis.Xu')
11、hbase命令
create 'FILE_UPLOAD_201506','cf'
disable 'FILE_UPLOAD_201506'
ebable 'FILE_UPLOAD_201506'
put 'FILE_UPLOAD_201506','1.0.0.721201508071640279500','cf:cdr','9^Z_30^20150618104053_8903_127.0.0.1_9_Z_30_7651.zip^127.0.0.1^192.168.114.141^D:\middlewarework\cems_middware2\cems\temp\^0'
get 'FILE_UPLOAD_201506','1.0.0.721201508071640279500'
get 'FILE_UPLOAD_201506','1.0.0.721201508071640279500','cf'
get 'FILE_UPLOAD_201506','1.0.0.721201508071640279500','cf:cdr'
10、linux脚本中单引号'' 双引号"" 命令行反引号``区别
name='lg';
echo "输出name参数值";
echo $name;
echo "输出强引用";
echo '$name'
echo "输出弱引用"
echo "$name";
echo "输出变量,{}增加变量边界辨识度"
echo "${name} is my name"
echo `expr index "$name" g`
echo "输出当前日期:"
echo `date`
echo -e "hello , your name is $name \n your second name is \"$name\" \n your three name is "$name" "
echo '${#name}'" 输出字符串长度 : ${#name}"
array_name=("12a" "23w")
echo "数组长度:${#array_name[@]}"
echo "数组第一个值:${array_name[0]}"
echo "数组第一个值长度:${#array_name[0]}"
read sname
echo "输入sname为:$sname"
echo "判断比较循环"
num1=100
num2=200
if test $num1 -ge $num2
then
echo 'num1 大于等于 num2'
else
echo 'num1 小于 num2'
fi
for loop in 1 2 3 4
do
echo "the value is : $loop"
done
for str in "this is a strs"
do
echo "the value is : $str"
done
demoFun(){
echo "this is demoFun"
}
demoFun
11、重定向 > 追加重定向 >>
12、sqoop命令
eval表达式
sqoop eval --connect jdbc:oracle:thin:@172.16.126.130:1521:epadv --username DSJYY_ZDXW --password Sjzx314! --query "DELETE FROM STUDENT"
SQOOP导出mr输出数据到oracle
sqoop export -D mapreduce.job.queuename=ZH000001 --connect jdbc:oracle:thin:@172.16.126.130:1521:epadv --username DSJYY_ZDXW --password Sjzx314! --table GW_CLASS_GRP --export-dir /user/ZH000001/wfk_test/terminalclass --fields-terminated-by '\t'
将数据从关系数据库导入文件到hive表中
sqoop import --connect jdbc:mysql://localhost:3306/test --username dyh --password 000000
--table users --hive-import --hive-table users -m 2 --fields-terminated-by "\0001";
主键自增长解决办法
sqoop export --connect jdbc:mysql://192.168.2.102:3306/vap-viewer --username root --password vrv123456. --table ml_pkilog --export-dir /ml/xiao11lang/pki_time_pair_tomysql -input-fields-terminated-by \\t
MYSQL转HDFS-示例
./sqoop import --connect jdbc:mysql://10.8.210.166:3306/recsys --username root --password root --table shop -m 1 --target-dir /user/recsys/input/shop/$today |
输出数据: ./hadoop fs -cat /user/recsys/input/shop/2013-05-07/* 生成的hdfs数据 287,516809,0,0,6,25,45.78692,126.65384 288,523944,0,0,29,6,120.26087,30.17264 ---------------------------------------------- |
HDFS转MYSQ-示例
./sqoop export --connect jdbc:mysql://10.8.210.166:3306/recsys --username root --password root --table shopassoc --fields-terminated-by ',' --export-dir /user/recsys/output/shop/$today |
输入数据:
技巧导出导入的数据与字段的顺序进行匹配从HDFS转换回mysql 的时候,自动增长的主键字段处,可以留空./hadoop fs -cat /user/recsys/output/shop/2013-05-07/* Hdfs原始数据 null,857207,729974,947.0818,29,2013-05-08 10:22:29 null,857207,524022,1154.2603,29,2013-05-08 10:22:29 -------------------------------------------------------------------------- |
示例参数说明
参数类型 |
参数名 |
解释 |
公共 |
connect |
Jdbc-url |
公共 |
username |
--- |
公共 |
password |
--- |
公共 |
table |
表名 |
Import |
target-dir |
制定输出hdfs目录,默认输出到/user/$loginName/ |
export |
fields-terminated-by |
Hdfs文件中的字段分割符,默认是“\t” |
export |
export-dir |
hdfs文件的路径 |
导出大数据的拆分:
m |
使用多少个并行导入,默认是1,未开启,数据量大的话会自动根据主键ID进行拆分 |
split-by |
使用于没有主键的表的拆分,指定拆分的字段,拆分的原则是分布要广泛(自动拆分) |
Where |
同时可以手动执行多次,通过where条件进行手动拆分 |
参数 |
解释 |
Job |
定时作业, 个人觉得没啥意义,通常我更相信crontab |
eval |
执行sql,远程进行一些操作,但是经过测试发现不支持 delete |
create-hive-table |
复制某一个数据源的数据存储到hive |
13、hadoop运行jar文件
hadoop jar /opt/hadoop-2.6.0/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0.jar wordcount /tmp/lg/test.txt /user/hadoop/examples/lgtest/map-reduce4
14、hive建表
CREATE TABLE `lg1`(
`key` string,
`value` string)
row format delimited
fields terminated by '\t'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
15、hadoop集群间转移数据
16、scp拷贝命令
scp -r /home/hadoop/.ssh hadoop-node2:/home/hadoop
scp root@10.27.89.8:/home/lg/software/eclipse.tar.gz /home/lg/software/eclipse.tar.gz
scp ./oozie-4.2.0.tar.gz hadoop@192.168.2.106:/opt/
17、计算目录下jar包数目
find . -name "*.jar" | wc -l
18.、hadoop 强制转换namenode
HADOOP_HOME/bin/hdfs haadmin -transitionToStandby --forcemanual namenode102
19、linux计划任务
hadoop同步时间
1 * * * * root ntpdate 192.168.2.102 && hwclock -w
1. 键入 crontab -e 编辑crontab服务文件
例如 文件内容如下:
*/2 * * * * /bin/sh /home/admin/jiaoben/buy/deleteFile.sh
保存文件并并退出
*/2 * * * * /bin/sh /home/admin/jiaoben/buy/deleteFile.sh
*/2 * * * * 通过这段字段可以设定什么时候执行脚本
/bin/sh /home/admin/jiaoben/buy/deleteFile.sh 这一字段可以设定你要执行的脚本,这里要注意一下bin/sh 是指运行 脚本的命令 后面一段时指脚本存放的路径
2. 查看该用户下的crontab服务是否创建成功, 用 crontab -l 命令
SHELL=/bin/bash
PATH=/sbin:/bin:/usr/sbin:/usr/bin
MAILTO=root
HOME=/
# For details see man 4 crontabs
# Example of job definition:
# .---------------- minute (0 - 59)
# | .------------- hour (0 - 23)
# | | .---------- day of month (1 - 31)
# | | | .------- month (1 - 12) OR jan,feb,mar,apr ...
# | | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
# | | | | |
# * * * * * user-name command to be executed
1 * * * * root ntpdate 192.168.2.102 && hwclock -w
20 、 集群启动大总结
查看时间
[root@hadoop01 ~]# history | grep date
127 date
128 date -s "-8hours"
133 ntpdate 192.168.2.200 && hwclock -w
210 date
211 history -s | ge\rep date
212 history -s | grep date
213 history | grep date
date -s "-8hours"
同步bios时间:
同步BIOS时钟,强制把系统时间写入CMOS,命令如下:
#clock -w
102上面看root账户查看
service ntpd status
service ntpd start
root账户ntp同步时间(10分钟后)ntpdate 192.168.2.102
定时同步
[hadoop@bx101 bin]$ cat /etc/crontab
SHELL=/bin/bash
PATH=/sbin:/bin:/usr/sbin:/usr/bin
MAILTO=root
HOME=/
# For details see man 4 crontabs
# Example of job definition:
# .---------------- minute (0 - 59)
# | .------------- hour (0 - 23)
# | | .---------- day of month (1 - 31)
# | | | .------- month (1 - 12) OR jan,feb,mar,apr ...
# | | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
# | | | | |
# * * * * * user-name command to be executed
1 * * * * root ntpdate 192.168.2.102 && hwclock -w
启动mysql
进入root账号 service mysqld status查看mysqld服务
启动zookeeper
./zkServer.sh start
./zkServer.sh status
启动HADOOP
./sbin/start-all.sh
./sbin/start-dfs.sh
网址
http://192.168.2.101:8088/cluster
http://192.168.2.101:50070
hbase启动查看页面 tomcat配置cataout_home hive查看metastore thrift
tomcat 修改
server.xml修改三个端口
vi catalina.sh修改
CATALINA_HOME=/opt/tomcat7.0.62
BASEDIR=/opt/tomcat7.0.62
启动hive
hive --service metastore &
[hadoop@bx101 ~]$ ps -ef | grep metastore
hadoop 10468 1 0 Oct20 ? 01:31:48 /usr/local/jdk/bin/java -Xmx256m -Djava.rmi.server.hostname=192.168.2.101
-Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dhadoop.
[hadoop@bx101 ~]$ netstat -tunpl | grep 10468
(Not all processes could be identified, non-owned process info
will not be shown, you would have to be root to see it all.)
tcp 0 0 0.0.0.0:9083 0.0.0.0:* LISTEN 10468/java
hive --service hiveserver -p 10001 &
[hadoop@bx101 ~]$ netstat -tunpl | grep 10001
(Not all processes could be identified, non-owned process info
will not be shown, you would have to be root to see it all.)
tcp 0 0 192.168.2.101:10001 0.0.0.0:* LISTEN 14725/java
[hadoop@bx101 ~]$ ps -ef | grep 14725
hadoop 14725 1 0 Nov30 ? 00:25:49 /usr/local/jdk/bin/java -Xmx256m -Djava.rmi.server.hostname=192.168.2.101 -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dhadoop.log.dir=/usr/local/hadoop/logs/dfslogs -Dhadoop.log.file=hadoop.log -Dhadoop.home.dir=/usr/local/hadoop -Dhadoop.id.str=hadoop -Dhadoop.root.logger=INFO,console -Djava.library.path=/usr/local/hadoop/lib/native -Dhadoop.policy.file=hadoop-policy.xml -Djava.net.preferIPv4Stack=true -Xmx512m -Dhadoop.security.logger=INFO,NullAppender org.apache.hadoop.util.RunJar /opt/hive-0.13.1/lib/hive-service-0.13.1.jar org.apache.hive.service.server.HiveServer2 --hiveconf hive.server2.thrift.port=10001
hadoop 29587 29161 0 09:25 pts/1 00:00:00 grep 14725
启动hbase
在101上执行 ./bin/start-hbase.sh启动hbase集群
启动客户端 ./hbase shell
停止HBase ./stop-hbase.sh #停止Hbase
1) HDFS主页
输入http://{主机名}:50070/dfshealth.jsp 进入HDFS主页,在该主页点击“Browse the filesystem”超链接,选择hbase目录,可以查看HBase在HDFS上生成的/hbase目录结构,该目录用于存放Hbase数据,如下图所示;
2) Master页面 http://bx101:60010/master-status
通过地址http://{主机名}:60010/master.jsp 可以查看HBase的相关信皂,如下图所示。
启动spark
## 在Master节点上执行
# cd /usr/local/spark && .bin/start-all.sh
./spark-shell --master spark://bx102:7077
spark使用该模式启动
spark-sql --master spark://bx102:7077
spark-shell --master yarn-client
//启动oozie程序
[hadoop@bx102/oozie-4.2.0]
bin/oozied.sh start
日志信息
日志
hadoop fs -ls /logs/yarn/hadoop/logs
hadoop fs -cat /logs/yarn/hadoop/logs/application_1436756289760_0317/bx103_36165
$OOZIE_HOME/bin/oozie job -oozie http://localhost:11000/oozie -log 0000110-150625161116868-oozie-hado-W
$ sbin/mr-jobhistory-daemon.sh start historyserver
sbin
./mr-jobhistory-daemon.sh start historyserver
开 启 httpfs 服 务
httpfs.sh start
[hadoop@hadoop01 bin]$ vi catalina.sh
#
# CATALINA_PID (Optional) Path of the file which should contains the pid
# of catalina startup java process, when start (fork) is used
#
# LOGGING_CONFIG (Optional) Override Tomcat's logging config file
# Example (all one line)
# LOGGING_CONFIG="-Djava.util.logging.config.file=$CATALINA_BASE/conf/logging.properties"
#
# LOGGING_MANAGER (Optional) Override Tomcat's logging manager
# Example (all one line)
# LOGGING_MANAGER="-Djava.util.logging.manager=org.apache.juli.ClassLoaderLogManager"
# -----------------------------------------------------------------------------
# OS specific support. $var _must_ be set to either true or false.
CATALINA_HOME=/opt/hadoop/share/hadoop/httpfs/tomcat
cygwin=false
os400=false
darwin=false
drill启动
[hadoop@bx101 bin]$ ./drillbit-all.sh start
[hadoop@bx101 bin]$ pwd
/opt/drill/bin
[hadoop@bx101 bin]$ ll
total 68
-rw-rw-r--. 1 hadoop hadoop 689 Oct 19 16:05 derby.log
-rwxrwxr-x. 1 hadoop hadoop 192 Dec 18 09:34 drillbit-all.sh
-rwxrwxr-x. 1 hadoop hadoop 6581 Jul 2 2015 drillbit.sh
-rwxrwxr-x. 1 hadoop hadoop 978 Jul 2 2015 drill-conf
-rwxrwxr-x. 1 hadoop hadoop 6342 Jul 2 2015 drill-config.sh
-rwxrwxr-x. 1 hadoop hadoop 964 Jul 2 2015 drill-embedded
-rwxrwxr-x. 1 hadoop hadoop 988 Jul 2 2015 drill-localhost
-rwxrwxr-x. 1 hadoop hadoop 1078 Jul 2 2015 dumpcat
-rw-rw-r--. 1 hadoop hadoop 820 Oct 14 10:03 export.hql
-rwxrwxr-x. 1 hadoop hadoop 104 Jul 2 2015 hadoop-excludes.txt
-rwxrwxr-x. 1 hadoop hadoop 1114 Jul 2 2015 runbit
-rwxrwxr-x. 1 hadoop hadoop 2288 Jul 2 2015 sqlline
-rwxrwxr-x. 1 hadoop hadoop 6006 Jul 2 2015 sqlline.bat
-rwxrwxr-x. 1 hadoop hadoop 1139 Jul 2 2015 submit_plan
netstat -an | grep 9800
22、启动后台进程 启动namenode
hadoop-daemon.sh startnamenode
你会发现hadoop-daemon.sh脚本用于启动namenode和jobtracker,而hadoop-daemons.sh脚本用于启动datanode,secondarynamenode和tasktracker。也就是说hadoop-daemon.sh用于启动master上的进程,而hadoop-daemons.sh用于启动slaves和secondarynamenode主机上的进程
23 、将正在运行的hadoop作业kill掉
hadoop job –kill [job-id]
13、将集群置于安全模式
$ bin/hadoop dfsadmin -safemode enter
14、显示Datanode列表
$ bin/hadoop dfsadmin -report
15、使Datanode节点 datanodename退役
$ bin/hadoop dfsadmin -decommission datanodename
17、显式地将HDFS置于安全模式
$ bin/hadoop dfsadmin -safemode
显示所有作业。-list只显示将要完成的作业。
hadoop job -list [all] -list all
http://blog.csdn.net/huoyunshen88/article/details/9055973 命令文档
24、查看文件里面有没有特殊字符 cat -A 文件名
25、获取任务状态
Total Nodes:2
Node-Id Node-State Node-Http-Address Number-of-Running-Containers
hadoop04:57533 RUNNING hadoop04:8042 3
hadoop05:57357 RUNNING hadoop05:8042 2
27、建立软连接ln -s /opt/hadoop-2.6.0 /usr/local/hadoop ln -s 原始位置 软连接位置
28、部署vap环境准备工作
(1)、时间同步 ntpdate 192.168.2.102
(2)、免密码最好做了
(3)、/opt目录设置为hadoop权限 chown -R hadoop:hadoop /opt
(4)、安装都必须用hadoop账户,不可以使用root账户
(5)、sqoop环境变量在[hadoop@bx105 local]$ vim /etc/profile.d/vrvbigdata.sh
#sqoop
export SQOOP_HOME=/opt/sqoop
export PATH=$SQOOP_HOME/bin:$PATH
export SQOOP_CONF_DIR=$SQOOP_HOME/conf
export LOGDIR=$SQOOP_HOME/logs
(6)、拷贝命令
scp -r hadoop-2.6.0 192.168.115.103:/opt/
scp -r tomcat7.0.62 192.168.115.103:/opt/
scp -r sqoop 192.168.115.103:/opt/
scp -r tomcat 192.168.115.103:/opt/
scp -r oozie-4.2.0 192.168.115.103:/opt/
(7)如果你的系统环境变量配置了export HADOOP_PREFIX=/usr/local/hadoop这个,那么自己要做一下软连接
建立软连接ln -s /opt/hadoop-2.6.0 /usr/local/hadoop
ln -s 原始位置 软连接位置
(8)、修改tomcat的环境变量设置export CATALINA_HOME=/opt/tomcat
29后台启动 nohub ****.sh < /dev/null &
bin/storm nimbus < /dev/null 2<&1 &