from pyspark.sql.types import *schema = StructType([StructField("name", StringType(), True),StructField("age", IntegerType(), True)])rdd = sc.parallelize([('Alice', 1)])spark_session.createDataFrame(rdd, schema)
或者使用 python 定义 udf from pyspark.sql.types import IntegerType from pyspark.sql.types import ArrayType def add_one_to_els(elements): return [el + 1 for el in elements] spark.udf.register("plusOneInt", add_one_to_els, ArrayType(IntegerType())) 1. 2. 3. 4. 5. 6. 在sql 中...