import requests
import pandas as pd
def getPathInfo(hdfsPath):
try:
s = requests.session()
userName = "hive" #getpass.getuser()
operation = "LISTSTATUS"
httpFsUrl = "http://hostip:14000"
req = "{0}/webhdfs/v1{1}?user.name={2}&op={3}".format(httpFsUrl,hdfsPath,userName,operation)
response = s.get(req,headers={'Connection':'close'})
result = response.json()["FileStatuses"]["FileStatus"]
dfTmp = pd.json_normalize(result)
dfTmp = dfTmp[ dfTmp["pathSuffix"].str.startswith("_") == False ]
s.close()
except Exception as ex:
print(ex)
return = dfTmp
listPath = getPathInfo(path)
'hdfs' 카테고리의 다른 글
[hdfs] webhdfs httpfs api (file put/get) (0) | 2021.04.16 |
---|