본문 바로가기

spark

[pyspark] create spark dataframe

from pyspark.sql.types import *

 

schema = StructType([StructField("col1",StringType(),True),\

StructField("col2",IntegerType(),True),\

StructField("col3",DoubleType(),True),\

StructField("col4",FloatType(),True)])

 

rowdata = []

rowdata.append(("col1_value",222,1.23,3.44555))

...

 

createDf = spark.createDataFrame(rowdata,schema)