apache · amoghrajesh · Jun 12, 2026
diff --git a/providers/apache/spark/docs/operators.rst b/providers/apache/spark/docs/operators.rst
@@ -190,7 +190,7 @@ independently on the cluster. If the Airflow worker dies while the Spark job is
 Airflow loses track of it and the behaviour to submit a brand new job would be wasting
 the compute already done or even cause conflicts if the Spark job itself is not designed to be idempotent.
 
-Now, the ``SparkSubmitOperator`` solves this by persisting the driver ID to ``task_state`` immediately after
+Now, the ``SparkSubmitOperator`` solves this by persisting the driver ID to ``task_store`` immediately after
 submission. On retry, it reads the ID back and reconnects to the already-running driver instead of
 resubmitting.
 
@@ -212,7 +212,7 @@ The reconnection polling calls the Spark standalone REST API
 See :doc:`connections/spark-submit` for how to configure these fields.
 
 .. note::
-    Crash recovery in cluster mode requires Airflow 3.3+ (``task_state`` support). On earlier
+    Crash recovery in cluster mode requires Airflow 3.3+ (``task_store`` support). On earlier
     versions the operator falls back to the previous behavior of always submitting fresh.
 
 Tracking driver status via Kubernetes API

diff --git a/providers/apache/spark/src/airflow/providers/apache/spark/operators/spark_submit.py b/providers/apache/spark/src/airflow/providers/apache/spark/operators/spark_submit.py
@@ -37,7 +37,7 @@
     # ResumableJobMixin does not exist in Airflow 2, so we need to add a stub to make it
     # behave as before
     class ResumableJobMixin:  # type: ignore[no-redef]
-        """Airflow 2 stub — no task_state, always submits fresh."""
+        """Airflow 2 stub — no task_store, always submits fresh."""
 
         external_id_key: str = "remote_job_id"
 
@@ -264,7 +264,7 @@ def execute(self, context: Context) -> None:
         if hook._should_track_driver_status:
             if self.reconnect_on_retry:
                 return self.execute_resumable(context)
-            # reconnect_on_retry=False: still submit-and-poll, just skip task_state persistence.
+            # reconnect_on_retry=False: still submit-and-poll, just skip task_store persistence.
             driver_id = self.submit_job(context)
             self.poll_until_complete(driver_id, context)
             return self.get_job_result(driver_id, context)
@@ -284,7 +284,7 @@ def execute(self, context: Context) -> None:
                 hook._validate_yarn_track_via_rm_api_config()
                 if self.reconnect_on_retry:
                     return self.execute_resumable(context)
-                # reconnect_on_retry=False: still submit-and-poll, just skip task_state persistence.
+                # reconnect_on_retry=False: still submit-and-poll, just skip task_store persistence.
                 driver_id = self.submit_job(context)
                 self.poll_until_complete(driver_id, context)
                 return self.get_job_result(driver_id, context)

diff --git a/providers/apache/spark/tests/unit/apache/spark/operators/test_spark_submit.py b/providers/apache/spark/tests/unit/apache/spark/operators/test_spark_submit.py
@@ -480,7 +480,7 @@ def test_inject_openlineage_simple_config_wrong_transport_to_spark(
         }
 
 
-class FakeTaskState:
+class FakeTaskStore:
     """In-memory task state for tests."""
 
     def __init__(self, stored: dict[str, str] | None = None):
@@ -528,7 +528,7 @@ def test_cluster_mode_first_run_persists_id_before_polling(self):
         operator._hook = self._make_hook(should_track=True)
         operator._hook.submit.return_value = "driver-001"
 
-        task_store = FakeTaskState()
+        task_store = FakeTaskStore()
         persisted_before_poll = []
 
         def track_poll(external_id, context):
@@ -555,7 +555,7 @@ def test_retry_behaviour_based_on_prior_driver_status(self, prior_status, expect
         operator = self._make_operator()
         operator._hook = self._make_hook(should_track=True)
         operator._hook.submit.return_value = "driver-new"
-        task_store = FakeTaskState({"spark_job_id": "driver-001"})
+        task_store = FakeTaskStore({"spark_job_id": "driver-001"})
 
         operator.get_job_status = lambda external_id, context: prior_status
         polled = []
@@ -590,7 +590,7 @@ def test_reconnect_on_retry_false_submits_fresh_and_polls(self):
         operator = self._make_operator(reconnect_on_retry=False)
         operator._hook = self._make_hook(should_track=True)
         operator._hook.submit.return_value = "driver-new"
-        task_store = FakeTaskState({"spark_job_id": "driver-old"})
+        task_store = FakeTaskStore({"spark_job_id": "driver-old"})
         polled = []
         operator.poll_until_complete = lambda external_id, context: polled.append(external_id)
 
@@ -733,7 +733,7 @@ def test_yarn_first_run_persists_app_id_before_polling(self):
         operator._hook._yarn_application_id = "application_1234_0001"
         operator._hook.submit.return_value = None
 
-        task_store = FakeTaskState()
+        task_store = FakeTaskStore()
         persisted_before_poll = []
 
         def track_poll(external_id, context):
@@ -747,7 +747,7 @@ def track_poll(external_id, context):
     def test_yarn_retry_reconnects_to_running_app(self):
         operator = self._make_operator()
         operator._hook = self._make_hook(is_yarn_cluster=True)
-        task_store = FakeTaskState({"spark_job_id": "application_1234_0001"})
+        task_store = FakeTaskStore({"spark_job_id": "application_1234_0001"})
 
         operator.get_job_status = lambda external_id, context: "RUNNING"
         polled = []
@@ -761,7 +761,7 @@ def test_yarn_retry_reconnects_to_running_app(self):
     def test_yarn_retry_skips_already_succeeded_app(self):
         operator = self._make_operator()
         operator._hook = self._make_hook(is_yarn_cluster=True)
-        task_store = FakeTaskState({"spark_job_id": "application_1234_0001"})
+        task_store = FakeTaskStore({"spark_job_id": "application_1234_0001"})
 
         operator.get_job_status = lambda external_id, context: "SUCCEEDED"
 
@@ -775,7 +775,7 @@ def test_yarn_retry_resubmits_after_failed_app(self):
         operator._hook._conf = {}
         operator._hook._yarn_application_id = "application_1234_0002"
         operator._hook.submit.return_value = None
-        task_store = FakeTaskState({"spark_job_id": "application_1234_0001"})
+        task_store = FakeTaskStore({"spark_job_id": "application_1234_0001"})
 
         operator.get_job_status = lambda external_id, context: "FAILED"
         polled = []