hdfs_test.py 1.1 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. """
  4. 关于python操作hdfs
  5. """
  6. import sys
  7. from datetime import datetime
  8. from datetime import date
  9. from datetime import timedelta
  10. from hdfs import Config
  11. from json import dump, load
  12. from hdfs.client import Client
  13. #返回目录下的文件
  14. def list(client, hdfs_path):
  15. return client.list(hdfs_path, status=False)
  16. if __name__ == "__main__":
  17. #client = Client(url, root=None, proxy=None, timeout=None, session=None)
  18. base_path = "/user/hadoop"
  19. now = datetime.now()
  20. yestoday = now - timedelta(days=1) # 一天以前
  21. start = datetime(yestoday.year, yestoday.month, yestoday.day)
  22. end = datetime(now.year, now.month, now.day)
  23. print(start)
  24. print(end)
  25. client = Client("http://zhenqin-mbp:50070")
  26. files = list(client, base_path)
  27. for f in files:
  28. file_path = base_path + "/" + f
  29. status = client.status(file_path)
  30. lastmodify = datetime.fromtimestamp(status["modificationTime"]/1000)
  31. if lastmodify < end and status["type"] == 'FILE':
  32. # delete file
  33. print("delete file: " + file_path)
  34. client.delete(file_path)