본문 바로가기

hdfs

[python] get hdfs path info

import requests
import pandas as pd

def getPathInfo(hdfsPath):
    try:
        s = requests.session()
        userName = "hive" #getpass.getuser()
        operation = "LISTSTATUS"
        httpFsUrl = "http://hostip:14000"
        req = "{0}/webhdfs/v1{1}?user.name={2}&op={3}".format(httpFsUrl,hdfsPath,userName,operation)
        response = s.get(req,headers={'Connection':'close'})
        result = response.json()["FileStatuses"]["FileStatus"]
        dfTmp = pd.json_normalize(result)
        dfTmp = dfTmp[ dfTmp["pathSuffix"].str.startswith("_") == False ]
        s.close()
    except Exception as ex:
        print(ex)
    return = dfTmp

listPath = getPathInfo(path)

'hdfs' 카테고리의 다른 글

[hdfs] webhdfs httpfs api (file put/get)  (0) 2021.04.16