UncompressFactory.java 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. package com.yiidata.parcel.utils;
  2. import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
  3. import org.apache.commons.io.IOUtils;
  4. import org.apache.commons.lang.StringUtils;
  5. import org.apache.tools.tar.TarEntry;
  6. import org.apache.tools.tar.TarInputStream;
  7. import org.slf4j.Logger;
  8. import org.slf4j.LoggerFactory;
  9. import java.io.BufferedInputStream;
  10. import java.io.IOException;
  11. import java.io.InputStream;
  12. import java.io.OutputStream;
  13. import java.nio.file.FileSystem;
  14. import java.nio.file.Files;
  15. import java.nio.file.Path;
  16. import java.nio.file.Paths;
  17. import java.nio.file.StandardOpenOption;
  18. import java.util.ArrayList;
  19. import java.util.List;
  20. import java.util.zip.ZipEntry;
  21. import java.util.zip.ZipInputStream;
  22. /**
  23. * Created by sssd on 2017/8/18.
  24. */
  25. public abstract class UncompressFactory {
  26. protected static Logger logger = LoggerFactory.getLogger(UncompressFactory.class);
  27. private UncompressFactory() {
  28. }
  29. /**
  30. * 定义解压抽象方法
  31. * @param hdfsinput hdfs 输入路径
  32. * @param fs hadaoop confit
  33. * @param uncompressPath 解压后输出路径
  34. * @return
  35. */
  36. public abstract List<String> uncompress(Path hdfsinput, FileSystem fs, Path uncompressPath);
  37. /**
  38. * 根据文件扩展名,返回对应的解压方法。
  39. * @param sub
  40. * @return
  41. */
  42. public static UncompressFactory getUncompressFactory(String sub) {
  43. if("zip".equals(sub)) {
  44. return new ZipUncompress();
  45. } else if("tar".equals(sub)) {
  46. return new TarUncompress();
  47. } else if("gzip".equals(sub)) {
  48. return new TgzUncompress();
  49. } else if("tgz".equals(sub)) {
  50. return new TgzUncompress();
  51. } else if("gz".equals(sub)) {
  52. return new TgzUncompress();
  53. }
  54. return null;
  55. }
  56. /**
  57. * .zip解压
  58. */
  59. static class ZipUncompress extends UncompressFactory {
  60. @Override
  61. public List<String> uncompress(Path hdfsinput, FileSystem fs, Path uncompressPath) {
  62. try {
  63. return uncompressZip(fs, hdfsinput, uncompressPath); // hadoop, hdfs输入路径, 解压保存路径
  64. } catch (IOException e) {
  65. throw new IllegalStateException(e);
  66. }
  67. }
  68. }
  69. /**
  70. * .tar 解压
  71. */
  72. static class TarUncompress extends UncompressFactory {
  73. @Override
  74. public List<String> uncompress(Path hdfsinput, FileSystem fs, Path uncompressPath) {
  75. try {
  76. return uncompressTar(fs, hdfsinput, uncompressPath); // hadoop, hdfs输入路径, 解压保存路径
  77. } catch (IOException e) {
  78. throw new IllegalStateException(e);
  79. }
  80. }
  81. }
  82. /**
  83. * .tgz 解压
  84. */
  85. static class TgzUncompress extends UncompressFactory {
  86. @Override
  87. public List<String> uncompress(Path hdfsinput, FileSystem fs, Path uncompressPath) {
  88. try {
  89. Path temp = uncompressTgz(fs, hdfsinput);
  90. return uncompressTar(fs, temp, uncompressPath); // hadoop, hdfs输入路径, 解压保存路径
  91. } catch (IOException e) {
  92. throw new IllegalStateException(e);
  93. }
  94. }
  95. }
  96. /**
  97. * zip解压
  98. * @param fs
  99. * @param input
  100. * @param uncompressPath
  101. * @return
  102. * @throws IOException
  103. */
  104. public static List<String> uncompressZip(FileSystem fs, Path input, Path uncompressPath) throws IOException {
  105. OutputStream out = null;
  106. List<String> paths = new ArrayList<String>();
  107. ZipInputStream zipInputStream = null;
  108. try {
  109. if (!Files.exists(input)) {
  110. throw new IllegalArgumentException(input.toString() + " does not exist");
  111. }
  112. zipInputStream = new ZipInputStream(Files.newInputStream(input));
  113. ZipEntry zipEntry = null;
  114. Path path = null;
  115. while ((zipEntry = zipInputStream.getNextEntry()) != null ) {
  116. String entryName = zipEntry.getName();
  117. if (zipEntry.isDirectory()) {
  118. // 如果是文件夹,创建文件夹并加速循环
  119. path = Paths.get(uncompressPath.toString(), entryName);
  120. Files.createDirectories(path);
  121. continue;
  122. }
  123. path = Paths.get(uncompressPath.toString(), entryName);
  124. out = Files.newOutputStream(path, StandardOpenOption.CREATE_NEW);
  125. IOUtils.copy(zipInputStream, out);
  126. out.flush();
  127. zipInputStream.closeEntry();
  128. paths.add(path.toString());
  129. logger.info("解压文件: {} 成功!", entryName);
  130. }
  131. } catch (Exception e) {
  132. e.printStackTrace();
  133. } finally {
  134. if(out != null){
  135. try {
  136. out.close();
  137. } catch (IOException e) {
  138. e.printStackTrace();
  139. }
  140. }
  141. if(zipInputStream != null){
  142. try {
  143. zipInputStream.close();
  144. } catch (IOException e) {
  145. e.printStackTrace();
  146. }
  147. }
  148. }
  149. return paths;
  150. }
  151. /**
  152. * .Tar 解压
  153. * @param fs
  154. * @param input
  155. * @param uncompressPath
  156. * @return
  157. * @throws IOException
  158. */
  159. public static List<String> uncompressTar(FileSystem fs, Path input, Path uncompressPath) throws IOException {
  160. OutputStream out = null;
  161. List<String> paths = new ArrayList<String>();
  162. TarInputStream tarInputStream = null;
  163. if (!Files.exists(input)) {
  164. throw new IllegalArgumentException(input.toString() + " does not exist");
  165. }
  166. try {
  167. tarInputStream = new TarInputStream(Files.newInputStream(input));
  168. TarEntry entry = null;
  169. Path path = null;
  170. while ( ( entry = tarInputStream.getNextEntry()) != null ){
  171. String entryName = entry.getName();
  172. if(entry.isDirectory()){
  173. path = Paths.get(uncompressPath.toString(), entryName);
  174. Files.createDirectories(path);
  175. continue;
  176. }
  177. path = Paths.get(uncompressPath.toString(), entryName);
  178. out = Files.newOutputStream(path, StandardOpenOption.CREATE_NEW);
  179. IOUtils.copy(tarInputStream, out);
  180. out.flush();
  181. paths.add(path.toString());
  182. logger.info("解压文件: {} 成功!", entryName);
  183. }
  184. } catch (IOException e) {
  185. e.printStackTrace();
  186. } finally {
  187. if ( out != null){
  188. try {
  189. out.close();
  190. } catch (IOException e) {
  191. e.printStackTrace();
  192. }
  193. }
  194. if(tarInputStream != null){
  195. try {
  196. tarInputStream.close();
  197. } catch (IOException e) {
  198. e.printStackTrace();
  199. }
  200. }
  201. }
  202. return paths;
  203. }
  204. /**
  205. * .tgz 解压
  206. * @param fs
  207. * @param input
  208. * @return
  209. * @throws IOException
  210. */
  211. public static Path uncompressTgz(FileSystem fs, Path input) throws IOException {
  212. int buffersize = 2048;
  213. OutputStream out = null;
  214. GzipCompressorInputStream gzin = null;
  215. InputStream hdfsinput = null;
  216. String temppath = null;
  217. Path outPath = null;
  218. if (!Files.exists(input)) {
  219. throw new IllegalArgumentException(input.toString() + " does not exist");
  220. }
  221. int i = input.toString().lastIndexOf("/");
  222. if ( i > 0){
  223. temppath = StringUtils.trimToNull(input.toString().substring(0,i));
  224. }
  225. try {
  226. hdfsinput = Files.newInputStream(input);
  227. BufferedInputStream in = new BufferedInputStream(hdfsinput);
  228. outPath = Paths.get(temppath, "tmp-" + System.currentTimeMillis() + ".tar");
  229. if( Files.exists(outPath)){
  230. Files.delete(outPath);
  231. }
  232. out = Files.newOutputStream(outPath);
  233. gzin = new GzipCompressorInputStream(in);
  234. final byte[] buffer = new byte[buffersize];
  235. int n = 0;
  236. while (-1 != (n = gzin.read(buffer))) {
  237. out.write(buffer, 0, n);
  238. }
  239. logger.info("临时文件的保存路径为:" + outPath.toString());
  240. } catch (IOException e) {
  241. e.printStackTrace();
  242. } finally {
  243. if(hdfsinput != null){
  244. try {
  245. hdfsinput.close();
  246. } catch (IOException e) {
  247. e.printStackTrace();
  248. }
  249. }
  250. if(out!= null){
  251. try {
  252. out.close();
  253. } catch (IOException e) {
  254. e.printStackTrace();
  255. }
  256. }
  257. }
  258. return outPath;
  259. }
  260. }