123456789101112131415161718192021222324252627282930313233343536373839 |
- #!/usr/bin/env python
- # encoding: utf-8
- """
- 关于python操作hdfs
- """
- import sys
- from datetime import datetime
- from datetime import date
- from datetime import timedelta
- from hdfs import Config
- from json import dump, load
- from hdfs.client import Client
- #返回目录下的文件
- def list(client, hdfs_path):
- return client.list(hdfs_path, status=False)
- if __name__ == "__main__":
- #client = Client(url, root=None, proxy=None, timeout=None, session=None)
- base_path = "/user/hadoop"
- now = datetime.now()
- yestoday = now - timedelta(days=1) # 一天以前
- start = datetime(yestoday.year, yestoday.month, yestoday.day)
- end = datetime(now.year, now.month, now.day)
- print(start)
- print(end)
- client = Client("http://zhenqin-mbp:50070")
- files = list(client, base_path)
- for f in files:
- file_path = base_path + "/" + f
- status = client.status(file_path)
- lastmodify = datetime.fromtimestamp(status["modificationTime"]/1000)
- if lastmodify < end and status["type"] == 'FILE':
- # delete file
- print("delete file: " + file_path)
- client.delete(file_path)
|