# Create Dict object courses = {'Courses':['Spark','PySpark','Java','pandas'], 'Fee':[20000,20000,15000,20000], 'Duration':['35days','35days','40days','30days']} # Create DataFrame from dict df = pd.DataFrame.from_dict(courses) print(df) Yields below output. # Output: Course...
Convert StructType to MapType (map) Column create_map() is a PySpark SQL function that is used to convert StructType to MapType column. #Convert struct type to Map from pyspark.sql.functions import col,lit,create_map df = df.withColumn("propertiesMap",create_map( lit("salary"),col("prop...
tolist()\n\n# all the other columns used as properties\nproperties = pdf.drop(['lat', 'lng'], axis=1).to_dict('records')\n\n# whole geojson object\nfeature_collection = FeatureCollection(features=features, properties=properties)\n\ngdf = geopandas.GeoDataFra...
df = df.astype({"Fee":"int","Discount":"int"}) print(df.dtypes) # Converting multiple columns to integer data type # using a dictionary dtypes_dict = {"Fee": int, "Discount": int} df = df.astype(dtypes_dict) print(df.dtypes) Yields below output. # Output: Courses object Fee i...
# ['PySpark' 25000 '40days' 2300] # ['Python' 22000 '35days' 1200] # ['pandas' 30000 '50days' 2000]] Convert Pandas Column to Array using Values() In this section, we’ll convert thepandas DataFramecolumninto aNumPy arrayusingdf['col_name'].values(). The values() function returns...
Convert Multiple Columns to String You can also convert multiple columns to strings by sending a dict of column names toastype()method. The below example converts the columnFeefrom int to string andDiscountfrom float to string dtype. # Multiple columns string conversiondf=pd.DataFrame(technologies...
print(df.dtypes) # Output: # Courses object # Fee int64 # Duration object # Discount object # dtype: object Complete Example of Convert String to Integer import pandas as pd import numpy as np technologies= ({ 'Courses':["Spark","PySpark","Hadoop","Pandas"], ...
Similarly, you can also convert multiple columns from float to integer by sendingdict of column name -> data typetoastype()method. The below example converts both columnsFeeandDiscount to int types. # Converting "Fee" and "Discount" from float to intdf=df.astype({"Fee":"int","Discount"...
df2 = df.to_json(orient ='index') print("After converting DataFrame to JSONstring:\n", df2) Yields below output. # Output: # After converting DataFrame to JSON string: {"0":{"Courses":"Spark","Fee":22000,"Duration":"30days","Discount":1000.0},"1":{"Courses":"PySpark","Fee":...
from pyspark.sql.functions import split # Splitting the "name" column into an array of first name, middle name, and last name df = df.withColumn("name_array", split(df["name"], ",\s*")) # Displaying the updated DataFrame df.show(truncate=False) ...