kafka消费者实时消费数据存入hdfs java scalca 代码

时间:2023-12-30 23:42:20

hadoop-client依赖很乱 调试很多次cdh版本好多jar没有 用hadoop2.7.3可以

   自定义输出流的池子进行流管理
public void writeLog2HDFS(String path, byte[] log) {
try {
//得到我们的装饰流
FSDataOutputStream out = HDFSOutputStreamPool.getInstance().takeOutputStream(path);
out.write(log);
out.write("\r\n".getBytes());
out.hsync();
out.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* @created by imp ON 2019/3/1
*/
object KafkaScalaConsumer { val write=new HDFSWriter() def ZK_CONN = "192.168.121.12:2181"
def GROUP_ID = "1test-consumer-group109"
def TOPIC = "eshop" def main(args: Array[String]): Unit = {
//println(" 开始了 ") val connector = Consumer.create(createConfig()) val topicCountMap = new HashMap[String, Int]()
topicCountMap.put(TOPIC, 3) // TOPIC在创建时就指定了它有3个partition val msgStreams: Map[String, List[KafkaStream[Array[Byte], Array[Byte]]]] = connector.createMessageStreams(topicCountMap) println("# of streams is " + msgStreams.get(TOPIC).get.size) val threadPool:ExecutorService=Executors.newFixedThreadPool(3) var index = 0;
for (stream <- msgStreams.get(TOPIC).get) {
threadPool.execute(new ThreadDemo("consumer_"+index,stream))
index+=1;
}
} class ThreadDemo(threadName:String,stream:KafkaStream[Array[Byte], Array[Byte]]) extends Runnable{
override def run(): Unit = { val it: ConsumerIterator[Array[Byte], Array[Byte]] = stream.iterator(); while(it.hasNext()){
val data : MessageAndMetadata[Array[Byte], Array[Byte]] = it.next()
val msg=data.message()
val log = new String(msg)
val arr = StringUtil.splitLog(log)
if (arr == null || arr.length < 1) return //todo: continue is not supported
//主机名
val hostname = StringUtil.getHostname(arr)
//日期串
val dateStr = StringUtil.formatYyyyMmDdHhMi(arr)
//path
val rawPath = "/spark/eshop/" + dateStr + "/" + hostname + ".log" //写入数据到hdfs
System.out.println(log)
write .writeLog2HDFS(rawPath, msg)
}
}
} def createConfig(): ConsumerConfig = {
val props = new Properties()
props.put("zookeeper.connect", ZK_CONN)
// props.put("bootstrap.servers","localhost:9092")
props.put("group.id", GROUP_ID)
props.put("zookeeper.session.timeout.ms", "")
props.put("zookeeper.connection.timeout.ms","")
props.put("auto.offset.reset", "smallest")
props.put("auto.commit.interval.ms", "")
props.put("rebalance.backoff.ms","")
props.put("rebalance.max.retries","")
props.put("auto.offset.reset", "smallest")
new ConsumerConfig(props)
}
}