你可以使用 when
要实现这一点。
from pyspark.sql import functions as F
data = [(111, 5611, "ABCD", 56.17, "ID",),
(211, 5411, "GFED", 451.1, "AMOUNT",),
(311, 3212, "YTRA", 687.3, "STUDY",),]
df = spark.createDataFrame(data, ("ID", "CODE", "STUDY", "AMOUNT","COL_NAME"))
def derive_column_value():
condition = F
for possible_value in df.columns:
condition = condition.when(F.col("COL_NAME") == possible_value, F.col(possible_value))
return condition
df.withColumn("COL_VALUE", derive_column_value()).show()
输出
+---+----+-----+------+--------+---------+
| ID|CODE|STUDY|AMOUNT|COL_NAME|COL_VALUE|
+---+----+-----+------+--------+---------+
|111|5611| ABCD| 56.17| ID| 111|
|211|5411| GFED| 451.1| AMOUNT| 451.1|
|311|3212| YTRA| 687.3| STUDY| YTRA|
+---+----+-----+------+--------+---------+