Spark 0.8 集群(CentOS6.4)-简单统计测试
|
$>cd ~/spark-0.8.0
$>bin/start-all.sh
|
|
$> jps
11055 Jps
2313 SecondaryNameNode
2409 JobTracker
2152 NameNode
4822 Master
|
|
// set the master node of spark cluster and runspark-shell
$> MASTER=spark://centos01:7077./spark-shell
// read the json data
$>val file = sc.textFile("hdfs://sdc/user/hadoop/In/DATA*.json")
// filter the json data
$>val ips = file.filter(line => line.contains("ip_address"))
// Count all the IP
$>ips.count()
// Count all the“241.*”IP
$>ips.filter(line => line.contains("241.")).count()
$>ips.filter(line => line.contains("241.")).collect()
|
