#!/usr/bin/env python # encoding: utf-8 """ 关于python操作hdfs """ import sys from datetime import datetime from datetime import date from datetime import timedelta from hdfs import Config from json import dump, load from hdfs.client import Client #返回目录下的文件 def list(client, hdfs_path): return client.list(hdfs_path, status=False) if __name__ == "__main__": #client = Client(url, root=None, proxy=None, timeout=None, session=None) base_path = "/user/hadoop" now = datetime.now() yestoday = now - timedelta(days=1) # 一天以前 start = datetime(yestoday.year, yestoday.month, yestoday.day) end = datetime(now.year, now.month, now.day) print(start) print(end) client = Client("http://zhenqin-mbp:50070") files = list(client, base_path) for f in files: file_path = base_path + "/" + f status = client.status(file_path) lastmodify = datetime.fromtimestamp(status["modificationTime"]/1000) if lastmodify < end and status["type"] == 'FILE': # delete file print("delete file: " + file_path) client.delete(file_path)