xxxxxxxxxx
#Find count for selected columns
from pyspark.sql.functions import col,isnan,when,count
df_Columns=["name","state","number"]
df.select([count(when(isnan(c) | col(c).isNull(), c)).alias(c) for c in df_Columns]
).show()
xxxxxxxxxx
# Find Count of Null, None, NaN of All DataFrame Columns
from pyspark.sql.functions import col,isnan, when, count
df.select([count(when(isnan(c) | col(c).isNull(), c)).alias(c) for c in df.columns]
).show()
+----+-----+------+
|name|state|number|
+----+-----+------+
| 0| 1| 3|
+----+-----+------+
xxxxxxxxxx
# Find count for empty, None, Null, Nan with string literals.
from pyspark.sql.functions import col,isnan,when,count
df2 = df.select([count(when(col(c).contains('None') | \
col(c).contains('NULL') | \
(col(c) == '' ) | \
col(c).isNull() | \
isnan(c), c
)).alias(c)
for c in df.columns])
df2.show()
+----+-----+------+
|name|state|number|
+----+-----+------+
| 0| 3| 3|
+----+-----+------+