apache · harshith1118 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 6, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -12,9 +12,21 @@ repos:
       # Run the linter.
       - id: ruff
         args: [ --fix ]
+        exclude: |
+          (?x)^(
+              examples/dask/hello_world/business_logic\.py|
+              examples/ray/hello_world/business_logic\.py|
+              examples/spark/pandas_on_spark/business_logic\.py
+          )$
       # Run the formatter.
       - id: ruff-format
         # args: [ --diff ]  # Use for previewing changes
+        exclude: |
+          (?x)^(
+              examples/dask/hello_world/business_logic\.py|
+              examples/ray/hello_world/business_logic\.py|
+              examples/spark/pandas_on_spark/business_logic\.py
+          )$
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v4.6.0
     hooks:
@@ -25,6 +37,12 @@ repos:
       - id: requirements-txt-fixer
       # valid python file
       - id: check-ast
+        exclude: |
+          (?x)^(
+              examples/dask/hello_world/business_logic\.py|
+              examples/ray/hello_world/business_logic\.py|
+              examples/spark/pandas_on_spark/business_logic\.py
+          )$
   - repo: local
     hooks:
       - id: validate-example-notebooks

diff --git a/contrib/docs/compile_docs.py b/contrib/docs/compile_docs.py
@@ -343,10 +343,12 @@ def _create_commit_file(df_path, single_df):
     commit_path = df_path.replace("docs", "static/commits")
     os.makedirs(commit_path, exist_ok=True)
     with open(os.path.join(commit_path, "commit.txt"), "w") as f:
-        for commit, ts in zip(
-            single_df["__init__.py"]["commit"], single_df["__init__.py"]["timestamp"]
-        ):
-            f.write(f"[commit::{commit}][ts::{ts}]\n")
+        f.writelines(
+            f"[commit::{commit}][ts::{ts}]\n"
+            for commit, ts in zip(
+                single_df["__init__.py"]["commit"], single_df["__init__.py"]["timestamp"]
+            )
+        )
 
 
 @config.when(is_dagworks="True")

diff --git a/contrib/hamilton/contrib/dagworks/sphinx_doc_chunking/test.ipynb b/contrib/hamilton/contrib/dagworks/sphinx_doc_chunking/test.ipynb
@@ -2,21 +2,26 @@
  "cells": [
   {
    "cell_type": "markdown",
+   "id": "c174ce5a23eed9a1",
+   "metadata": {
+    "collapsed": false
+   },
    "source": [
     "## A basic notebook to run the pipeline defined in `doc_pipeline.py`.\n",
     "\n",
     "By default this runs parts of the pipeline in parallel using threads or processes.\n",
     "\n",
     "To scale processing here look at all the subsequent cells that show how to run on \n",
     " ray or dask. For spark see spark/notebook.ipynb."
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "c174ce5a23eed9a1"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": 0,
+   "id": "initial_id",
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "import doc_pipeline\n",
@@ -46,25 +51,25 @@
     "for chunk in result[\"collect_chunked_url_text\"]:\n",
     "    pprint.pprint(chunk)\n",
     "dag"
-   ],
-   "metadata": {
-    "collapsed": true
-   },
-   "id": "initial_id",
-   "execution_count": 0
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "# Ray"
-   ],
+   "id": "7bc40e6914aed330",
    "metadata": {
     "collapsed": false
    },
-   "id": "7bc40e6914aed330"
+   "source": [
+    "# Ray"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "id": "a4df6e50283f68ab",
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "import logging\n",
@@ -86,9 +91,7 @@
     "    # Choose a backend to process the parallel parts of the pipeline\n",
     "    # .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=5))\n",
     "    # .with_remote_executor(executors.MultiProcessingExecutor(max_tasks=5))\n",
-    "    .with_remote_executor(\n",
-    "        h_ray.RayTaskExecutor()\n",
-    "    )  # be sure to run ray.init() or pass in config.\n",
+    "    .with_remote_executor(h_ray.RayTaskExecutor())  # be sure to run ray.init() or pass in config.\n",
     "    .build()\n",
     ")\n",
     "dag = dr.display_all_functions()\n",
@@ -104,24 +107,25 @@
     "\n",
     "ray.shutdown()\n",
     "dag"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "a4df6e50283f68ab"
+   ]
   },
   {
    "cell_type": "markdown",
-   "source": [
-    "# Dask"
-   ],
+   "id": "46aa4763a337dcb1",
    "metadata": {
     "collapsed": false
    },
-   "id": "46aa4763a337dcb1"
+   "source": [
+    "# Dask"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "id": "103824eec22810fe",
+   "metadata": {
+    "collapsed": false
+   },
    "outputs": [],
    "source": [
     "import logging\n",
@@ -162,11 +166,7 @@
     "\n",
     "client.shutdown()\n",
     "dag"
-   ],
-   "metadata": {
-    "collapsed": false
-   },
-   "id": "103824eec22810fe"
+   ]
   }
  ],
  "metadata": {

diff --git a/contrib/hamilton/contrib/dagworks/text_summarization/test.ipynb b/contrib/hamilton/contrib/dagworks/text_summarization/test.ipynb
@@ -23,7 +23,6 @@
    ],
    "source": [
     "import os\n",
-    "import hamilton\n",
     "\n",
     "os.getcwd()"
    ]

diff --git a/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.ipynb b/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.ipynb
@@ -23,7 +23,6 @@
    ],
    "source": [
     "import os\n",
-    "import hamilton\n",
     "\n",
     "os.getcwd()"
    ]
@@ -66,7 +65,6 @@
     }
    ],
    "source": [
-    "\n",
     "print(translate_to_hamilton.user_prompt(\"a = b + c\"))"
    ]
   },

diff --git a/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.py b/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.py
@@ -46,9 +46,7 @@ def a(b: float, c: float) -> float:
 This Hamilton setup assumes that `b` and `c` are provided to the framework as inputs. If `b` and `c` were to be computed by other functions within the Hamilton framework or came from some form of data loading functions, those functions would need to be defined in `functions.py` as well, with appropriate signatures.
 '''
     expected = [
-        "def a(b: float, c: float) -> float:\n"
-        '    """Adds b and c to get a."""\n'
-        "    return b + c\n",
+        'def a(b: float, c: float) -> float:\n    """Adds b and c to get a."""\n    return b + c\n',
         "from hamilton import driver\n"
         "import functions\n"
         "\n"

diff --git a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py
@@ -105,8 +105,7 @@ def snli_dataset(download_path: str = "data") -> pd.DataFrame:
     file_name = "snli_1.0.zip"
     # Write the downloaded file into "snli_1.0.zip"
     with open(file_name, "wb") as fd:
-        for chunk in response.iter_content(chunk_size=1024):
-            fd.write(chunk)
+        fd.writelines(response.iter_content(chunk_size=1024))
     # Create a ZipFile Object
     with zipfile.ZipFile(file_name) as zip_file:
         # Extract all the contents of zip file in current directory