[spark dataframe] extract date value using pyspark udf lambda
from pyspark.sql import SparkSession import pyspark.sql.functions as func import datetime fnDataReplace = func.udf(lambda s : s.replace('\\','')) fnGetBaseDate = func.udf(lambda value1, s1, s2, s3 : extractBaseDate(value1, s1, s2, s3)) def extractBaseDate(value1, dateCol1, dateCol2, timestampCol): if (dateCol1 is not None) and len(dateCol1) > 13: baseDate = datetime.datetime.strptime(dateCol1, "..
[pyspark] create spark dataframe
from pyspark.sql.types import * schema = StructType([StructField("col1",StringType(),True),\ StructField("col2",IntegerType(),True),\ StructField("col3",DoubleType(),True),\ StructField("col4",FloatType(),True)]) rowdata = [] rowdata.append(("col1_value",222,1.23,3.44555)) ... createDf = spark.createDataFrame(rowdata,schema)