hdfs
[python] get hdfs path info
2jelly
2021. 12. 27. 16:07
import requests
import pandas as pd
def getPathInfo(hdfsPath):
try:
s = requests.session()
userName = "hive" #getpass.getuser()
operation = "LISTSTATUS"
httpFsUrl = "http://hostip:14000"
req = "{0}/webhdfs/v1{1}?user.name={2}&op={3}".format(httpFsUrl,hdfsPath,userName,operation)
response = s.get(req,headers={'Connection':'close'})
result = response.json()["FileStatuses"]["FileStatus"]
dfTmp = pd.json_normalize(result)
dfTmp = dfTmp[ dfTmp["pathSuffix"].str.startswith("_") == False ]
s.close()
except Exception as ex:
print(ex)
return = dfTmp
listPath = getPathInfo(path)