hadoop 的hdfsapi操纵

时间:2021-07-28 02:46:49

标签:大数据   hadoop   

hdfs 的api 相关操纵

一: hdfs api 操纵 1.1 读取内容: package it.xuegod.hadoop.hdfs; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.nio.file.FileSystem; import org.apache.hadoop.fs.FsUrlStreamHandlerFactory; import org.junit.Test; import org.apache.hadoop.conf.Configuration; public class TestHdfs { @Test public void readFile() throws IOException { URL.setURLStreamHandlerFactory(new FsUrlStreamHandlerFactory()); URL url = new URL("hdfs://172.17.100.11:8020/input/file1"); URLConnection conn = url.openConnection(); InputStream is = conn.getInputStream(); byte[] buf = new byte[is.available()]; is.read(buf); is.close(); String str = new String(buf); System.out.println(str); } }

hadoop 的hdfsapi操纵

1.2 读取hdfs的数据 package it.xuegod.hadoop.hdfs; import java.io.ByteArrayOutputStream; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; /** * * * @author zhangyy * *read file * */ public class readFileApi { @Test public void readFileByApi() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://172.17.100.11:8020"); FileSystem fs = FileSystem.get(conf); Path p = new Path("/input/file1"); FSDataInputStream fis = fs.open(p); byte[] buf = new byte[1024]; int len = -1; ByteArrayOutputStream baos = new ByteArrayOutputStream(); while((len = fis.read(buf)) != -1) { baos.write(buf, 0, len); } fis.close(); baos.close(); System.out.println(new String(baos.toByteArray())); } }

hadoop 的hdfsapi操纵

package it.xuegod.hadoop.hdfs; import java.io.ByteArrayOutputStream; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Test; /** * * * @author zhangyy * * readfile * */ public class readFileApi2 { @Test public void readFileByApi() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://172.17.100.11:8020"); FileSystem fs = FileSystem.get(conf); ByteArrayOutputStream baos = new ByteArrayOutputStream(); Path p = new Path("/input/file1"); FSDataInputStream fis = fs.open(p); IOUtils.copyBytes(fis, baos, 1024); System.out.println(new String(baos.toByteArray())); } }

hadoop 的hdfsapi操纵

1.3: 在hdfs上面的成立文件 package it.xuegod.hadoop.hdfs; import java.io.ByteArrayOutputStream; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Test; /** * * * @author zhangyy * * readfile * */ public class mkdirFileApi { @Test public void mkdirFileByApi() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://172.17.100.11:8020"); FileSystem fs = FileSystem.get(conf); fs.mkdirs(new Path("/myhadoop")); } }

hadoop 的hdfsapi操纵


hadoop 的hdfsapi操纵


###1.4: put写入文件 package it.xuegod.hadoop.hdfs; import java.io.ByteArrayOutputStream; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Test; /** * * * @author zhangyy * * readfile * */ public class putFileApi { @Test public void putFileByApi() throws IOException { Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://172.17.100.11:8020"); FileSystem fs = FileSystem.get(conf); FSDataOutputStream out = fs.create(new Path("/myhadoop/a.txt")); out.write("helloword".getBytes()); out.close(); } }