вперед заполнить pyspark
from pyspark.sql.functions import coalesce, last, first
df.withColumn('latitude_new', coalesce(last('latitude',True).over(w1), first('latitude',True).over(w2))) \
.select('name','timestamplast', 'latitude','latitude_new') \
.show()
+----+-------------------+---------+------------+
|name| timestamplast| latitude|latitude_new|
+----+-------------------+---------+------------+
| 1|2019-08-01 00:00:00| null| 51.819645|
| 1|2019-08-01 00:00:01| null| 51.819645|
| 1|2019-08-01 00:00:02|51.819645| 51.819645|
| 1|2019-08-01 00:00:03| 51.81964| 51.81964|
| 1|2019-08-01 00:00:04| null| 51.81964|
| 1|2019-08-01 00:00:05| null| 51.81964|
| 1|2019-08-01 00:00:06| null| 51.81964|
| 1|2019-08-01 00:00:07| 51.82385| 51.82385|
+----+-------------------+---------+------------+
Courageous Caterpillar