diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ec7978d92..91f860b91 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,9 +12,21 @@ repos: # Run the linter. - id: ruff args: [ --fix ] + exclude: | + (?x)^( + examples/dask/hello_world/business_logic\.py| + examples/ray/hello_world/business_logic\.py| + examples/spark/pandas_on_spark/business_logic\.py + )$ # Run the formatter. - id: ruff-format # args: [ --diff ] # Use for previewing changes + exclude: | + (?x)^( + examples/dask/hello_world/business_logic\.py| + examples/ray/hello_world/business_logic\.py| + examples/spark/pandas_on_spark/business_logic\.py + )$ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 hooks: @@ -25,6 +37,12 @@ repos: - id: requirements-txt-fixer # valid python file - id: check-ast + exclude: | + (?x)^( + examples/dask/hello_world/business_logic\.py| + examples/ray/hello_world/business_logic\.py| + examples/spark/pandas_on_spark/business_logic\.py + )$ - repo: local hooks: - id: validate-example-notebooks diff --git a/contrib/docs/compile_docs.py b/contrib/docs/compile_docs.py index b5503ec61..b32616072 100644 --- a/contrib/docs/compile_docs.py +++ b/contrib/docs/compile_docs.py @@ -343,10 +343,12 @@ def _create_commit_file(df_path, single_df): commit_path = df_path.replace("docs", "static/commits") os.makedirs(commit_path, exist_ok=True) with open(os.path.join(commit_path, "commit.txt"), "w") as f: - for commit, ts in zip( - single_df["__init__.py"]["commit"], single_df["__init__.py"]["timestamp"] - ): - f.write(f"[commit::{commit}][ts::{ts}]\n") + f.writelines( + f"[commit::{commit}][ts::{ts}]\n" + for commit, ts in zip( + single_df["__init__.py"]["commit"], single_df["__init__.py"]["timestamp"] + ) + ) @config.when(is_dagworks="True") diff --git a/contrib/hamilton/contrib/dagworks/sphinx_doc_chunking/test.ipynb b/contrib/hamilton/contrib/dagworks/sphinx_doc_chunking/test.ipynb index d60262dc3..546dad5eb 100644 --- a/contrib/hamilton/contrib/dagworks/sphinx_doc_chunking/test.ipynb +++ b/contrib/hamilton/contrib/dagworks/sphinx_doc_chunking/test.ipynb @@ -2,6 +2,10 @@ "cells": [ { "cell_type": "markdown", + "id": "c174ce5a23eed9a1", + "metadata": { + "collapsed": false + }, "source": [ "## A basic notebook to run the pipeline defined in `doc_pipeline.py`.\n", "\n", @@ -9,14 +13,15 @@ "\n", "To scale processing here look at all the subsequent cells that show how to run on \n", " ray or dask. For spark see spark/notebook.ipynb." - ], - "metadata": { - "collapsed": false - }, - "id": "c174ce5a23eed9a1" + ] }, { "cell_type": "code", + "execution_count": 0, + "id": "initial_id", + "metadata": { + "collapsed": true + }, "outputs": [], "source": [ "import doc_pipeline\n", @@ -46,25 +51,25 @@ "for chunk in result[\"collect_chunked_url_text\"]:\n", " pprint.pprint(chunk)\n", "dag" - ], - "metadata": { - "collapsed": true - }, - "id": "initial_id", - "execution_count": 0 + ] }, { "cell_type": "markdown", - "source": [ - "# Ray" - ], + "id": "7bc40e6914aed330", "metadata": { "collapsed": false }, - "id": "7bc40e6914aed330" + "source": [ + "# Ray" + ] }, { "cell_type": "code", + "execution_count": null, + "id": "a4df6e50283f68ab", + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "import logging\n", @@ -86,9 +91,7 @@ " # Choose a backend to process the parallel parts of the pipeline\n", " # .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=5))\n", " # .with_remote_executor(executors.MultiProcessingExecutor(max_tasks=5))\n", - " .with_remote_executor(\n", - " h_ray.RayTaskExecutor()\n", - " ) # be sure to run ray.init() or pass in config.\n", + " .with_remote_executor(h_ray.RayTaskExecutor()) # be sure to run ray.init() or pass in config.\n", " .build()\n", ")\n", "dag = dr.display_all_functions()\n", @@ -104,24 +107,25 @@ "\n", "ray.shutdown()\n", "dag" - ], - "metadata": { - "collapsed": false - }, - "id": "a4df6e50283f68ab" + ] }, { "cell_type": "markdown", - "source": [ - "# Dask" - ], + "id": "46aa4763a337dcb1", "metadata": { "collapsed": false }, - "id": "46aa4763a337dcb1" + "source": [ + "# Dask" + ] }, { "cell_type": "code", + "execution_count": null, + "id": "103824eec22810fe", + "metadata": { + "collapsed": false + }, "outputs": [], "source": [ "import logging\n", @@ -162,11 +166,7 @@ "\n", "client.shutdown()\n", "dag" - ], - "metadata": { - "collapsed": false - }, - "id": "103824eec22810fe" + ] } ], "metadata": { diff --git a/contrib/hamilton/contrib/dagworks/text_summarization/test.ipynb b/contrib/hamilton/contrib/dagworks/text_summarization/test.ipynb index fa9ce029e..06f46e867 100644 --- a/contrib/hamilton/contrib/dagworks/text_summarization/test.ipynb +++ b/contrib/hamilton/contrib/dagworks/text_summarization/test.ipynb @@ -23,7 +23,6 @@ ], "source": [ "import os\n", - "import hamilton\n", "\n", "os.getcwd()" ] diff --git a/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.ipynb b/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.ipynb index 8b42ecd21..42c771faa 100644 --- a/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.ipynb +++ b/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.ipynb @@ -23,7 +23,6 @@ ], "source": [ "import os\n", - "import hamilton\n", "\n", "os.getcwd()" ] @@ -66,7 +65,6 @@ } ], "source": [ - "\n", "print(translate_to_hamilton.user_prompt(\"a = b + c\"))" ] }, diff --git a/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.py b/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.py index 6fcf816b9..3a3478aa5 100644 --- a/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.py +++ b/contrib/hamilton/contrib/dagworks/translate_to_hamilton/test.py @@ -46,9 +46,7 @@ def a(b: float, c: float) -> float: This Hamilton setup assumes that `b` and `c` are provided to the framework as inputs. If `b` and `c` were to be computed by other functions within the Hamilton framework or came from some form of data loading functions, those functions would need to be defined in `functions.py` as well, with appropriate signatures. ''' expected = [ - "def a(b: float, c: float) -> float:\n" - ' """Adds b and c to get a."""\n' - " return b + c\n", + 'def a(b: float, c: float) -> float:\n """Adds b and c to get a."""\n return b + c\n', "from hamilton import driver\n" "import functions\n" "\n" diff --git a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py index a038698dc..50f7ae55a 100644 --- a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py +++ b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/__init__.py @@ -105,8 +105,7 @@ def snli_dataset(download_path: str = "data") -> pd.DataFrame: file_name = "snli_1.0.zip" # Write the downloaded file into "snli_1.0.zip" with open(file_name, "wb") as fd: - for chunk in response.iter_content(chunk_size=1024): - fd.write(chunk) + fd.writelines(response.iter_content(chunk_size=1024)) # Create a ZipFile Object with zipfile.ZipFile(file_name) as zip_file: # Extract all the contents of zip file in current directory diff --git a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/notebook.ipynb b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/notebook.ipynb index c95fd6e7b..489850b6f 100644 --- a/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/notebook.ipynb +++ b/contrib/hamilton/contrib/user/skrawcz/customize_embeddings/notebook.ipynb @@ -28,15 +28,16 @@ } ], "source": [ - "import __init__ as customize_embeddings\n", "import logging\n", "import sys\n", "\n", + "import __init__ as customize_embeddings\n", + "\n", "logging.basicConfig(level=logging.INFO, stream=sys.stdout)\n", "\n", "from hamilton import driver\n", "\n", - "dr = (driver.Builder().with_modules(customize_embeddings).with_config({\"source\": \"snli\"}).build())" + "dr = driver.Builder().with_modules(customize_embeddings).with_config({\"source\": \"snli\"}).build()" ] }, { @@ -731,7 +732,9 @@ } ], "source": [ - "dr.display_all_functions(deduplicate_inputs=True, graphviz_kwargs=dict(graph_attr=dict(size=\"20,10\")))" + "dr.display_all_functions(\n", + " deduplicate_inputs=True, graphviz_kwargs=dict(graph_attr=dict(size=\"20,10\"))\n", + ")" ] }, { @@ -843,10 +846,7 @@ " \"customized_embeddings_dataframe\",\n", " \"customized_dataset_histogram\",\n", "]\n", - "result = dr.execute(\n", - " outputs,\n", - " inputs={}\n", - ")" + "result = dr.execute(outputs, inputs={})" ] }, { @@ -962,7 +962,7 @@ 0.09564363378265606, 0.03484310323231943, 0.17715313729736415, - 0.13624637822330365, + 0.13624637822330363, 0.05070200141084402, 0.15148975108715468, -2.220446049250313e-16, @@ -975,7 +975,7 @@ 0.1797729540748142, 0.0717376913271538, 0.13687217788763806, - 0.09128900546836871, + 0.09128900546836873, 0.07439240549476456, 0.05679370213731971, 0.08637151617735939, @@ -983,14 +983,14 @@ 0.16045645026072086, 0.17849276567983263, 0.03449848166822267, - 0.054707974693142924, + 0.05470797469314293, 0.04923006262601248, 0.07794553320765862, 0.08767154442428926, 0.18676883071646477, 0.0693443990429895, 0.1547463914854933, - 0.17024046548731087, + 0.1702404654873109, 0.1036868993235629, 0.2187435024767047, 0.03327658011633905, @@ -999,7 +999,7 @@ 0.16665734391539044, 0.10951298865768744, 0.031001771088611907, - 0.026414138706981838, + 0.02641413870698184, -2.220446049250313e-16, 0.1392095183476293, 0.13212624951835472, @@ -1015,7 +1015,7 @@ 0.021152406169620885, 0.1372772961159503, 0.2329851400641052, - 0.18997724755119494, + 0.18997724755119497, 0.11289429009802188, 0.12832037008138886, 0.07695127114854838, @@ -1029,14 +1029,14 @@ 0.04562690384856394, 0.26068268281869034, 0.19358936446818387, - 0.11994851525150607, - 0.18879313041077495, + 0.11994851525150609, + 0.18879313041077497, 0.12873534720029234, 0.026249114316675093, - 0.12076995086930853, + 0.12076995086930851, 0.1161823585581937, - 0.11231967702490231, - 0.22343755521428021, + 0.11231967702490232, + 0.2234375552142802, 0.16904935881220062, 0.12688865133503358, 0.14416566117077312, @@ -1045,15 +1045,15 @@ 0.0803966413915469, 0.20281514485035412, 0.1715451595023625, - 0.15390348958543199, + 0.153903489585432, 0.0690587061985578, 0.1026295440785423, 0.15162932998484935, 0.03821940855326522, 0.0743203026586976, - 0.10144630956989287, + 0.10144630956989288, 0.11636349790119171, - 0.19724954583484078, + 0.1972495458348408, 0.11852340050276644, 0.17289517661879072, 0.11680987728590864, @@ -1063,7 +1063,7 @@ 0.09029265717654578, 0.0496323091232701, 0.24535432444432603, - 0.11196587565213911, + 0.11196587565213913, 0.1650446280046639, 0.1702818193106721, 0.11998249182982534, @@ -1074,11 +1074,11 @@ 0.12444359888015744, 0.12208825950258095, 0.1609180517944303, - 0.043810506712729236, + 0.04381050671272923, 0.10711265746148624, 0.1296927342647397, 0.05609514352536582, - 0.15550705431037581, + 0.1555070543103758, 0.08504094156307407, 0.1446068822258456, 0.10161725172996028, @@ -1090,7 +1090,7 @@ 0.12172821773930909, 0.18005404584354867, 0.14834011009814385, - 0.09181934767412725, + 0.09181934767412724, 0.1817446523144044, 0.2100467920423027, 0.14990063079928895, @@ -1098,7 +1098,7 @@ 0.1421035951006996, 0.3083564958352415, 0.2165190216835181, - 0.11378450701005793, + 0.11378450701005792, 0.1706296950803623, 0.1917916425687839, 0.06794890208894588, @@ -1115,21 +1115,21 @@ 0.14086326785219283, 0.010509084102219513, 0.30829089077451866, - 0.09395395634072001, - 0.09631972104587805, + 0.09395395634072, + 0.09631972104587803, 0.1015283303726654, 0.04409844839991928, 0.17979086797834054, - 0.22591784218744426, - 0.15935167124104765, - 0.20405181719331522, + 0.22591784218744423, + 0.15935167124104763, + 0.2040518171933152, 0.0841957669740856, - 0.10091507771201269, + 0.10091507771201268, 0.1955038119814999, - 0.22660233091323034, - 0.12077783575091539, + 0.22660233091323032, + 0.1207778357509154, 0.22845325390755788, - 0.10793273087155797, + 0.10793273087155796, 0.239959161638428, 0.1759210176885202, 0.05739234542776839, @@ -1144,30 +1144,30 @@ 0.1723089070514805, 0.12033690714055234, 0.2363829160694092, - 0.09101429877784817, + 0.09101429877784815, 0.1354570386235503, 0.18198375740943973, 0.1168670507355537, 0.09965382769533404, 0.12972911375173146, - 0.19219902474082062, + 0.19219902474082065, 0.09532001156310543, 0.13261338791344268, 0.14267389441340095, 0.08879407159072839, 0.03462022051302027, - 0.12113825634483999, + 0.12113825634484, 0.25815087158629213, 0.1861804408091804, 0.1426664923066605, 0.19393864641794123, 0.22902773003623145, - 0.22146460157432002, + 0.22146460157432, 0.16020412776129567, 0.13541527643806606, 0.09456053887553328, 0.15276454972214992, - 0.09549955456140691, + 0.09549955456140692, 0.218960090817662, 0.01053196752826202, 0.15466081845124913, @@ -1176,19 +1176,19 @@ 0.09072500199416644, 0.1846979453740647, 0.13174745135122623, - 0.18216352392718183, + 0.18216352392718185, 0.27401125174648233, - 0.22791674243943538, + 0.22791674243943535, 0.03413168541064371, 0.0828790105635484, 0.04065590197310054, 0.05467201873951921, - 0.20182020182030014, + 0.20182020182030017, 0.25846110452250104, 0.06269648041256493, 0.1438079576021306, 0.07507886575585032, - 0.23090069130639934, + 0.23090069130639937, 0.07561656102502634, 0.09824826169749346, 0.0216629576722821, @@ -1200,12 +1200,12 @@ 0.16388145889215877, 0.09426240313609369, 0.11709877845797712, - 0.18504714058428207, + 0.1850471405842821, 0.09216264071675206, - 0.09774628735226287, + 0.09774628735226289, 0.09195400555293398, 0.14593301509555345, - 0.09611925913806807, + 0.09611925913806808, 0.09339701119792965, 0.144389685123949, 0.15165255936615085, @@ -1232,9 +1232,9 @@ 0.04237835073480056, 0.21351564933920308, 0.10632937574965118, - 0.20291816729697854, + 0.20291816729697856, 0.18536880885366935, - 0.09845769994789921, + 0.0984576999478992, 0.2581334889656396, 0.16938938299781003, 0.1180028503651207, @@ -1246,7 +1246,7 @@ 0.13880092506853026, 0.1743626344738104, 0.1687888738296478, - 0.10381419194316999, + 0.10381419194317, 0.18340460437050565, 0.2021219749289681, 0.22080418229128584, @@ -1255,15 +1255,15 @@ 0.07633010160470877, 0.3798284146967028, 0.2125978276559114, - 0.09484732346128721, - 0.21060827338235222, + 0.0948473234612872, + 0.21060827338235225, 0.05897539587126399, 0.13522372919561887, - 0.10075208685592063, + 0.10075208685592064, 0.1932652964126188, 0.02072873300262079, - 0.09282390483177883, - 0.15041244316720281, + 0.09282390483177884, + 0.1504124431672028, 0.22512344995305344, 0.08025755079139463, 0.15748408467500974, @@ -1283,12 +1283,12 @@ 0.020172078363185908, 0.01126124392586414, 0.1677680660333558, - 0.12372873950105301, + 0.123728739501053, 0.08493736357196502, 0.092787812464731, 0.1473883219935458, 0.06758996665502492, - 0.10801594590261443, + 0.10801594590261444, 0.04442409614797693, 0.0802884495897721, 0.10032749765352078, @@ -1296,18 +1296,18 @@ 0.25325920229115995, 0.06703091186761179, 0.24924833466807383, - 0.11957758167450949, + 0.11957758167450948, 0.13561380956192903, 0.1248624486492681, 0.16629570212684142, 0.21796308933128716, 0.253664370309034, - 0.12320431312306701, + 0.123204313123067, 0.19601114115821827, 0.17318519509722263, - 0.24137318066246438, + 0.2413731806624644, 0.06821903240416782, - 0.18412301232530026, + 0.18412301232530023, 0.16612217170736854, 0.08209798132603685, 0.2275462251747713, @@ -1325,9 +1325,9 @@ 0.14732992470356532, 0.03847769135756396, 0.11056626398590064, - 0.24091771414175867, + 0.2409177141417587, 0.14256226023452512, - 0.10401757891347063, + 0.10401757891347065, 0.23089642115939857, 0.048804988838712426, 0.08394260736387049, @@ -1336,7 +1336,7 @@ 0.0954733757546361, 0.16212122352656022, 0.033580686114896396, - 0.21325779616933838, + 0.2132577961693384, 0.2697966787449494, 0.18418259666373304, 0.20010890017895555, @@ -1345,20 +1345,20 @@ 0.24702607226003503, 0.14679797213957135, 0.13879229985224983, - 0.0030829588408693986, + 0.003082958840869398, 0.14776993221505674, - 0.029214703605656478, + 0.02921470360565648, 0.19337694062554733, 0.1244055399125904, 0.1590994849990286, 0.17425746931492947, 0.1681486282425101, - 0.10428849617906055, - 0.18957152661696874, + 0.10428849617906057, + 0.18957152661696872, 0.1076121395819084, 0.16530583966056422, 0.02129621004085469, - 0.22856071811586998, + 0.22856071811587, 0.15870590563342246, 0.17341213174097225, 0.046517658287295816, @@ -1369,13 +1369,13 @@ 0.24470149023925203, 0.08883859512505088, 0.19447314838723984, - 0.24432862499595154, + 0.2443286249959515, 0.135723477675178, - 0.12336310331523681, + 0.1233631033152368, 0.04252090760881422, 0.09216339050071398, 0.10170322839444867, - 0.09367407225078139, + 0.0936740722507814, 0.1976806754624374, 0.07270900428227878, 0.284384924034784, @@ -1391,18 +1391,18 @@ 0.3419376864622429, 0.08442702035692684, 0.11945512451235796, - 0.20749232040394938, - 0.11289904389607175, - 0.17966678497231459, + 0.20749232040394935, + 0.11289904389607176, + 0.1796667849723146, 0.08244206285889033, 0.04659815838986947, -2.220446049250313e-16, 0.08214508249627384, - 0.22414342038838686, + 0.2241434203883869, 0.2240799146075828, 0.1633621481531632, 0.08890052013590077, - 0.18277953628489763, + 0.18277953628489765, 0.016481756171637274, 0.1685835851265406, 0.2533735983649834, @@ -1411,23 +1411,23 @@ 0.16744338400724712, 0.0824236505815732, 0.12682874602139593, - 0.19041180746582254, + 0.19041180746582256, 0.06949507261747245, 0.22664217025962308, 0.24505653161443852, - 0.16302618235287603, - 0.11830093812484077, - 0.21410553048321246, - 0.17773183939229487, + 0.16302618235287605, + 0.11830093812484076, + 0.2141055304832125, + 0.1777318393922949, 0.26821409507346705, - 0.17269860181789753, + 0.1726986018178975, 0.08016957367853594, 0.22752190362787472, 0.07326440779092724, - 0.039023713345712197, + 0.0390237133457122, 0.1653898928176314, 0.1956771208742465, - 0.11360628930848449, + 0.11360628930848447, 0.08211476755999136, 0.311703009533833 ], @@ -1453,22 +1453,22 @@ "type": "histogram", "x": [ 0.08651136271082727, - 0.12132598642078063, + 0.12132598642078064, 0.0844214181027001, - 0.19592189350391798, + 0.195921893503918, 0.16752052173260423, 0.14999268232824747, 0.18105859236632704, 0.22637753370896252, 0.05491953482424072, - 0.10729012335622379, + 0.1072901233562238, 0.0881185337305157, 0.13673885405023478, - 0.09343026209400629, + 0.09343026209400628, 0.128707786599341, 0.17358805251806375, 0.07911479306904967, - 0.21165472910077265, + 0.21165472910077263, 0.06319797646424152, 0.21849523359940448, 0.2283514229371013, @@ -1492,12 +1492,12 @@ 0.2235347459718151, 0.13147266523700674, 0.16866924914715375, - 0.057827710323293036, + 0.05782771032329304, 0.12846739188098744, 0.1661172408198971, 0.007720042538432836, 0.02738887950069746, - 0.12248825279438003, + 0.12248825279438004, 0.06541038167362256, 0.2564808858250732, 0.13518604052740713, @@ -1507,7 +1507,7 @@ 0.09392719055117926, 0.12793001952873462, 0.25007459016169176, - 0.12123546172761179, + 0.1212354617276118, 0.005065307479738879, 0.10999798297572971, 0.07093666239090413, @@ -1515,7 +1515,7 @@ 0.22573428002474405, 0.1351957401896967, 0.17142628319134523, - 0.014472688716724247, + 0.014472688716724249, 0.06023647808714805, 0.1600973947044807, 0.21310671177884444, @@ -1543,7 +1543,7 @@ 0.2763085931200111, 0.15688461495176964, 0.039668037624959185, - 0.13782226921709007, + 0.1378222692170901, 0.06750836881548561, 0.10535565091609522, 0.07205263728655675, @@ -1561,33 +1561,33 @@ 0.164416453972567, 0.04108745779271816, 0.1538285351663179, - 0.23173784181932522, + 0.2317378418193252, 0.15159948456589856, 0.07968651973401097, 0.06714566399915223, 0.2616924823593827, 0.08278212897103332, 0.16074777602545742, - 0.09355292795624437, + 0.09355292795624436, 0.11770198888585892, 0.04398875511550138, - 0.09914621432627047, + 0.09914621432627048, 0.09853980579801602, 0.19033565743687175, 0.18053102454413728, 0.16521963079215696, 0.06324334524679232, 0.16280925378350697, - 0.10033557558453987, + 0.10033557558453989, 0.168445571838709, - 0.11504767397788163, + 0.11504767397788164, 0.13225619006127098, - 0.20890462643103158, + 0.2089046264310316, 0.14771096615564738, 0.06957648597664634, 0.17063726780216737, 0.049406895759433245, - 0.20206018396887826, + 0.20206018396887823, 0.13417443760984282, 0.03884738655771314, 0.04165345303588874, @@ -1596,29 +1596,29 @@ 0.14974012688455618, 0.13659301756933862, 0.15613837016439336, - 0.17458213691348345, + 0.17458213691348343, 0.09997216442244972, 0.048263881795574504, 0.0772960801235354, - 0.17273460421570785, + 0.17273460421570783, 0.1330853279177493, - 0.09705162399064571, + 0.09705162399064572, 0.06132796176109323, - 0.11282385255012317, + 0.11282385255012316, 0.03806688899819288, 0.13698934256591, 0.20248268532728964, 0.05477467889022858, - 0.10444587729675825, + 0.10444587729675824, 0.06487317325265252, - 0.16444301166032627, + 0.1644430116603263, 0.06478198992785145, 0.12520969787734937, 0.15290982814544296, 0.1463113346379884, - 0.21632111528228826, + 0.21632111528228823, 0.022445201333687437, - 0.21423249849873538, + 0.21423249849873535, 0.06564322660042987, 0.2672781389384541, 0.246859873611612, @@ -1629,12 +1629,12 @@ 0.1697180623293797, 0.23671768066950083, 0.1671679284335712, - 0.027683805583460286, + 0.02768380558346029, 0.09552421332612804, 0.09309370604530898, 0.13202903373441932, 0.1186683477504018, - 0.12223100310773483, + 0.12223100310773484, 0.07384527681113906, 0.037638343583392575, 0.08187505657714667, @@ -1653,16 +1653,16 @@ 0.05390838068347936, 0.15723894587212028, 0.11755968199530298, - 0.20052669268377998, + 0.20052669268378, 0.15672065103943889, - 0.09020676360024837, + 0.09020676360024836, 0.12392708963637789, - 0.12220984571428539, + 0.1222098457142854, 0.13946633983648526, 0.06298889934554419, - 0.11110031419000999, + 0.11110031419001, 0.15650194342747337, - 0.09542264673480749, + 0.09542264673480748, 0.109049919783194, 0.1356007616171282, 0.11547953944862632, @@ -1673,10 +1673,10 @@ 0.16336094377002897, 0.20763961189178093, 0.07849382798834958, - 0.09210433839373477, + 0.09210433839373476, 0.1433476857577035, 0.25659718604790016, - 0.19419917738370263, + 0.19419917738370265, 0.07379749555485338, 0.12021879826532078, 0.19178965343310128, @@ -1688,13 +1688,13 @@ 0.0331875775041387, 0.22347232626872493, 0.28073262198232374, - 0.12029914463003033, + 0.12029914463003032, 0.0703818779136891, - 0.12035565716479435, + 0.12035565716479436, 0.030598068526143662, - 0.11959586571282221, + 0.1195958657128222, 0.1525476988818587, - 0.036448752206815316, + 0.03644875220681531, 0.1956239922567915, 0.12908211309789408, 0.20426007023269743, @@ -1712,18 +1712,18 @@ 0.23741592635554265, 0.2043784945986591, 0.10989027182893418, - 0.22227031565844502, + 0.22227031565844504, 0.1047955673630665, 0.09023250096012991, - 0.11326378886786181, + 0.1132637888678618, 0.16487661474326065, 0.29859068516470566, 0.2583831089679619, - 0.19370950768716066, + 0.19370950768716064, 0.0713804567981089, 0.14211753267283878, 0.188856684095416, - 0.10886103670737701, + 0.108861036707377, 0.27811932129704886, 0.17596272781348554, 0.08240963422506153, @@ -1732,7 +1732,7 @@ 0.11379985299974604, 0.0171268287983799, 0.07371039854612282, - 0.18563654008220853, + 0.1856365400822085, 0.17981443880239012, 0.08822567743719079, 0.08162035490654462, @@ -1747,7 +1747,7 @@ 0.21478439513926484, 0.19646248741381123, 0.07821215128366565, - 0.20780425592479346, + 0.20780425592479343, 0.09249162937231192, 0.28689099533842355, 0.1161448709421059, @@ -1757,9 +1757,9 @@ 0.07736159027924139, 0.05966912555724413, 0.15001131786961974, - 0.10585341189074371, + 0.10585341189074372, 0.10582738276760228, - 0.09098380974616427, + 0.09098380974616428, 0.0719294575335977, 0.13131370560472244, 0.18649261003881612, @@ -1774,7 +1774,7 @@ 0.13761038712218188, 0.15981819072305448, 0.17356202704496293, - 0.10624006240251183, + 0.10624006240251184, 0.13035911585362725, 0.14258395958477044, 0.0734410147004605, @@ -1785,19 +1785,19 @@ 0.1397744846029999, 0.04971021136490439, 0.1435311604190619, - 0.039094150656551196, + 0.0390941506565512, 0.19580147543383652, 0.06534317597853578, 0.17182525976811203, 0.10180602866377098, 0.08538393307346404, - 0.11667073798361793, + 0.11667073798361792, 0.15710415056251648, 0.19229885432050953, 0.06571992222485457, 0.13982591213826034, 0.004074359593135957, - 0.09744062252267649, + 0.09744062252267648, 0.17169211070926915, 0.04701868975669021, 0.17004896948473835, @@ -1808,7 +1808,7 @@ 0.018245710752253563, 0.07600390063050799, 0.1342017940823491, - 0.22633791573627526, + 0.2263379157362753, 0.10918513534276009, 0.02002898490283056, 0.1255779702901112, @@ -1819,16 +1819,16 @@ 0.1706623852275576, 0.10127196421739948, 0.15437181098721875, - 0.10918155794887219, + 0.1091815579488722, 0.029567544375147747, 0.13728858614166128, 0.03540563430568833, - 0.23277338909964806, + 0.2327733890996481, 0.04025058818469973, 0.03125610031538617, 0.12324222666113982, - 0.09845241432222651, - 0.22178913351105833, + 0.09845241432222653, + 0.2217891335110583, 0.06418341282196882, 0.05088177728888765, 0.05229274282883378, @@ -1839,12 +1839,12 @@ 0.16004651840295225, 0.05217002879257293, 0.15492233192206728, - 0.11861684030477859, + 0.1186168403047786, 0.08931366905428273, 0.13480744109655873, 0.1726206501048393, 0.3171258870190208, - 0.20026410299990982, + 0.20026410299990985, 0.05386062521254398, 0.22971173531775824, 0.13954186873181162, @@ -1861,7 +1861,7 @@ 0.04245902550470815, 0.11973064055467086, 0.17360665965983624, - 0.09305789068911563, + 0.09305789068911564, 0.18359474734370151, 0.13729299714393173, 0.2477593284933164, @@ -1871,19 +1871,19 @@ 0.17857459859055236, 0.17203499236502962, 0.04899922976555371, - 0.20675462609225526, + 0.20675462609225528, 0.2131010448014805, 0.02123037314658105, - 0.20755781136230478, + 0.2075578113623048, 0.11547953944862632, - 0.19167091042891082, + 0.1916709104289108, 0.10172625588078144, 0.13567109397735733, 0.14385907732922487, - 0.21012899304750798, + 0.210128993047508, 0.12491838509303888, 0.05504502006331646, - 0.22439608284650758, + 0.2243960828465076, 0.1099391262777194, 0.15409470717881824, 0.15869341710098617, @@ -1891,14 +1891,14 @@ 0.21800279769894337, 0.2513541248428206, 0.16757660402700914, - 0.09708530694050621, - 0.09505377967378459, + 0.0970853069405062, + 0.0950537796737846, 0.056184803892081914, 0.015772487444837613, 0.08912962631447552, 0.08558444329601489, 0.016916742264686757, - 0.20950958818449505, + 0.20950958818449503, 0.14953239349439007, 0.1421851192366841, 0.07144145134973423, @@ -1910,12 +1910,12 @@ 0.16017914638944475, 0.08734177843213509, 0.1595132249743999, - 0.13715357749935841, + 0.1371535774993584, 0.2481577497280123, 0.15084407517345, 0.13726146030743358, 0.05211707457779857, - 0.19318644303199062, + 0.19318644303199065, 0.2785970772595636, 0.06696286578745181, 0.19199731420798927, @@ -1926,9 +1926,9 @@ 0.06571073327758825, 0.14832819493216465, 0.12143524817252016, - 0.24057020058214174, + 0.24057020058214176, 0.13439134944907005, - 0.10971950458923141, + 0.1097195045892314, 0.12690346580820144, 0.1026094640265639, 0.18992168346073668, @@ -1936,22 +1936,22 @@ 0.12446496373651128, 0.22238436669144956, 0.17606910917063712, - 0.11887404763268161, + 0.1188740476326816, 0.11591468529632898, 0.13779443180558537, 0.21119383517384815, 0.14821698917885773, 0.1864089569323435, - 0.13365239520917493, + 0.1336523952091749, 0.10064793008206674, 0.10606542694444632, 0.126244173281434, 0.07616623366746345, - 0.09139102810332611, - 0.10056735194822031, + 0.09139102810332612, + 0.10056735194822032, 0.1333938355456843, 0.15311734480310857, - 0.09399619852425323 + 0.09399619852425324 ], "xaxis": "x", "yaxis": "y" @@ -2005,12 +2005,12 @@ 0.2097698420382722, 0.4281417611743731, 0.2428128133029972, - 0.18580463686169402, + 0.185804636861694, 0.31226163702575804, 0.3402828834410987, 0.3966111193262125, 0.2705690658296297, - 0.24963250750450894, + 0.24963250750450897, 0.20974519573366113, 0.32388995060658554, 0.19126292874874828, @@ -2022,7 +2022,7 @@ 0.3434950815145402, 0.22089526576046725, 0.2949241941824077, - 0.24679450721409057, + 0.24679450721409055, 0.28119760761856527, 0.2581710635790445, 0.40736104657455496, @@ -2052,7 +2052,7 @@ 0.227975454441107, 0.22304208438393025, 0.2674091777006654, - 0.21239847608919626, + 0.21239847608919624, 0.36585831166676874, 0.3130357795711102, 0.24279404866709753, @@ -2065,7 +2065,7 @@ 0.1910255640894023, 0.30377305440730684, 0.2231174228553916, - 0.40141547752097595, + 0.4014154775209759, 0.27180277839235967, 0.2824017051028933, 0.2612429614019959, @@ -2090,11 +2090,11 @@ 0.18964777022305712, 0.26687166781966265, 0.25108289073736056, - 0.20252697616669946, + 0.20252697616669943, 0.33956834543633085, 0.31231756638393493, 0.2584482795689169, - 0.24129506818111146, + 0.24129506818111143, 0.2318345595820095, 0.3240519063172087, 0.3309830582109461, @@ -2114,21 +2114,21 @@ 0.28633674998103376, 0.20904601891216845, 0.2301428186155806, - 0.24337316747847237, + 0.24337316747847235, 0.24231123163256785, 0.2888298152598543, 0.22776226155403367, 0.1541341763882469, - 0.22085374863221519, + 0.2208537486322152, 0.2468135580246693, 0.25289115607985513, 0.38431102601858624, 0.30715956733445815, - 0.24104356390298753, + 0.2410435639029875, 0.2267583969908723, 0.2381382894954185, 0.28539831592053666, - 0.23884853523495087, + 0.2388485352349509, 0.34669097320902564, 0.29016314080997896, 0.3320165815559919, @@ -2141,7 +2141,7 @@ 0.25534539512628485, 0.2870174648275907, 0.28832018547682514, - 0.23369258335490162, + 0.2336925833549016, 0.2781916829457618, 0.3017366856514043, 0.3319791105288501, @@ -2182,7 +2182,7 @@ 0.38424465946921726, 0.26748454905712615, 0.28245964508963417, - 0.24178666788585979, + 0.2417866678858598, 0.2503896186887492, 0.3267647097586658, 0.300343913484172, @@ -2206,12 +2206,12 @@ 0.32096085130338337, 0.32512954461156707, 0.2669840310062094, - 0.22051524527760058, + 0.22051524527760055, 0.30200543631879706, 0.3427015436226163, 0.2467294656017932, 0.35108711778945334, - 0.18586637037927578, + 0.18586637037927575, 0.2881597986384137, 0.2933190982753042, 0.3254155625762867, @@ -2246,16 +2246,16 @@ 0.25262697026700087, 0.3489847771421837, 0.30142553819005835, - 0.20527378814548958, + 0.2052737881454896, 0.3171147872169089, - 0.20635065264812158, - 0.20967470734073246, + 0.2063506526481216, + 0.20967470734073249, 0.24413489934298693, 0.27842658080265537, 0.3099832531209421, - 0.23731198287828714, - 0.10241682022097987, - 0.24026253413403642, + 0.23731198287828711, + 0.10241682022097988, + 0.2402625341340364, 0.24104133313611287, 0.32950453157087845, 0.27900141072006635, @@ -2296,11 +2296,11 @@ 0.421678589593213, 0.2805755505376386, 0.16459576146862576, - 0.23688871059033634, + 0.2368887105903363, 0.31595949576362103, 0.30887035680672825, 0.2736204974535239, - 0.24528324954746938, + 0.24528324954746936, 0.2630239487627848, 0.28688827579283727, 0.29526510824968666, @@ -2325,7 +2325,7 @@ 0.2694634803888598, 0.28274106616297545, 0.23815763062381567, - 0.37884883934428437, + 0.3788488393442844, 0.2881171820519093, 0.31319362706518117, 0.26280894851242387, @@ -2367,7 +2367,7 @@ 0.2782813399225822, 0.28859327790879097, 0.360769486632474, - 0.39708400798247123, + 0.3970840079824712, 0.36831370025164933, 0.2043861156321678, 0.2590649528026232, @@ -2404,7 +2404,7 @@ 0.2954808501661056, 0.2895695659980635, 0.27191433320688996, - 0.20589395184660297, + 0.20589395184660295, 0.21574499583635065, 0.2654778998530678, 0.25100488933390286, @@ -2417,8 +2417,8 @@ 0.23032540174604144, 0.27236630560517316, 0.24185382409222655, - 0.23281899893447122, - 0.12151055628611673, + 0.2328189989344712, + 0.12151055628611672, 0.24212730028297635, 0.2821038300752966, 0.323737840709341, @@ -2437,13 +2437,13 @@ 0.23876907149165305, 0.3063915830561661, 0.23389340263260228, - 0.19776205823211435, + 0.19776205823211437, 0.24304820815235773, 0.21242365894885384, 0.32336710754440845, 0.3504322170058809, 0.3148246778368339, - 0.22296582934563158, + 0.2229658293456316, 0.26202032031903133, 0.23757683735594937, 0.2339709631353004, @@ -2459,7 +2459,7 @@ 0.323438814945052, 0.2237012910105074, 0.2880876311859303, - 0.21283746380596014, + 0.21283746380596016, 0.3235533362804349, 0.299860007720687, 0.2506389063725072, @@ -2545,7 +2545,7 @@ 0.3426925075938775, 0.32440118043801747, 0.2449687856622853, - 0.23193574275415718, + 0.2319357427541572, 0.35817938959738493, 0.3185250591493597, 0.2451084200316367, @@ -2556,9 +2556,9 @@ 0.275212055962471, 0.2865756419701765, 0.140279297529195, - 0.21907435555908106, + 0.21907435555908103, 0.30881984083976366, - 0.19036186880198502, + 0.19036186880198505, 0.33256440092314876, 0.19794935117802903, 0.26243338030498053, @@ -2570,7 +2570,7 @@ 0.4119106675437634, 0.3105067966164018, 0.3835317022966309, - 0.24925779785611935, + 0.24925779785611937, 0.3226550336424008, 0.2743340688443272, 0.32573147771983957, @@ -2583,7 +2583,7 @@ 0.30602734910217055, 0.2193306740826999, 0.1724300442981399, - 0.22351806103535055, + 0.22351806103535057, 0.21049804746058687, 0.19965258218566972, 0.2608662151215433, @@ -2599,26 +2599,26 @@ 0.2665821777519283, 0.23834690847481987, 0.383600167718931, - 0.22071764420221518, + 0.2207176442022152, 0.2702586034750156, 0.31597696014399035, 0.33108548694415163, 0.2456947901139218, 0.2272991585009193, - 0.21355947015967758, + 0.2135594701596776, 0.18822107425202972, 0.30504269519151983, 0.24726298395285817, 0.16269005110618673, 0.261293433878089, - 0.24039370674253502, + 0.24039370674253505, 0.3117702028862739, 0.2507068236588411, 0.26145562665512656, 0.25388686299903795, - 0.19689405873471366, + 0.1968940587347137, 0.30025606623169987, - 0.20736302799424577, + 0.20736302799424575, 0.327771560266106, 0.2431956846982134, 0.2116863896919171, @@ -2638,8 +2638,8 @@ 0.27449944264649984, 0.2520411593021483, 0.21768823655531433, - 0.20216143176697798, - 0.22079549241268326, + 0.202161431766978, + 0.2207954924126833, 0.21605574876288247, 0.3109840935588294, 0.2611935284882042, @@ -2678,7 +2678,7 @@ 0.23262661901274184, 0.27738733560146256, 0.2752744030183971, - 0.21280916703863118, + 0.2128091670386312, 0.3095537650180844, 0.20985105649589175, 0.16482152948732354, @@ -2714,7 +2714,7 @@ 0.20361592842495835, 0.33543547926799366, 0.3139783008356324, - 0.22878216746449198, + 0.228782167464492, 0.25790839624248374, 0.28014434180677006, 0.22687762574473225, @@ -2731,7 +2731,7 @@ 0.2553622772201015, 0.307820124765514, 0.263143762508833, - 0.23825290690296674, + 0.23825290690296672, 0.208430138887163, 0.30421384071861, 0.23485471219927112, @@ -2747,7 +2747,7 @@ 0.2827942587155936, 0.195396570453098, 0.27988675287275366, - 0.23879920372318586, + 0.23879920372318583, 0.32374871774732905, 0.3143677420191602, 0.35806377169688064, @@ -2758,7 +2758,7 @@ 0.2008656996900251, 0.28853611961168657, 0.2590329904863554, - 0.10237655947502655, + 0.10237655947502657, 0.16450029340673844, 0.2078222796185395, 0.2422433140557153, @@ -2766,7 +2766,7 @@ 0.32683103810177005, 0.2476432259161122, 0.25662316235372107, - 0.24762770689764502, + 0.24762770689764504, 0.3245377815155177, 0.29950422935296095, 0.2846244762501512, @@ -2819,7 +2819,7 @@ 0.18117185550085035, 0.25659989517785975, 0.2844207278723483, - 0.20701438282763363, + 0.20701438282763365, 0.2658054471245427, 0.3787004523953895, 0.31467198963476595, @@ -2835,17 +2835,17 @@ 0.2907992650026523, 0.232639010419313, 0.31087858771131605, - 0.22459249153698402, + 0.224592491536984, 0.25095191526534255, 0.30150798008444346, 0.3222894806798503, 0.23923860821199772, - 0.21677017963210066, + 0.21677017963210063, 0.2649259157557218, - 0.18930971303017174, + 0.18930971303017177, 0.3071088132933587, - 0.22119138218519374, - 0.22133828599953786, + 0.22119138218519377, + 0.22133828599953784, 0.29775152384277925, 0.24773932492986173, 0.2469317488047863, @@ -2867,12 +2867,12 @@ 0.26960341461044957, 0.24613040529646768, 0.33832714935354136, - 0.21008582617615446, - 0.23039836988761786, + 0.21008582617615448, + 0.23039836988761783, 0.2595567702962094, 0.23672254506881707, 0.2254204014277249, - 0.22552007835969234, + 0.22552007835969232, 0.2345774557606597, 0.2971287028585631, 0.27520165828766796, @@ -2889,7 +2889,7 @@ 0.25763317971535704, 0.32276647385087187, 0.3003604506627905, - 0.24124055075280282, + 0.2412405507528028, 0.2438364465665751, 0.2947733501234783, 0.2648064364207796, @@ -2897,7 +2897,7 @@ 0.27068791921023716, 0.30990649182940744, 0.2156911282979922, - 0.24240775013309146, + 0.24240775013309143, 0.26232815248075836, 0.2824510109001539, 0.2967753030931156, @@ -2958,7 +2958,7 @@ 0.34159758948052055, 0.23222303700970537, 0.28803393542392786, - 0.15182900546064781, + 0.1518290054606478, 0.2608729466852593, 0.2997492036554462, 0.28609122590085545, @@ -2970,11 +2970,11 @@ 0.2617263322302906, 0.311987038297748, 0.28153094215767627, - 0.21942828257916758, + 0.2194282825791676, 0.28959470333599624, 0.30440425476155186, 0.29263278741459475, - 0.23539595753873066, + 0.23539595753873063, 0.2626895163820534, 0.28386549278583717, 0.2575895116800312, @@ -2982,9 +2982,9 @@ 0.24952101292164064, 0.21183217914949803, 0.27669377647914584, - 0.21740697882130122, + 0.2174069788213012, 0.27854204459246246, - 0.20925184562064414, + 0.20925184562064417, 0.33337915866653733, 0.297819346915311, 0.3013526743532917, @@ -5557,7 +5557,7 @@ ], "range": [ 0.8713888888888889, - 0.9436111111111111 + 0.9436111111111112 ], "title": { "text": "accuracy" @@ -5582,7 +5582,7 @@ "matches": "y", "range": [ 0.8713888888888889, - 0.9436111111111111 + 0.9436111111111112 ], "showticklabels": false, "type": "linear" @@ -5608,7 +5608,7 @@ "matches": "y", "range": [ 0.8713888888888889, - 0.9436111111111111 + 0.9436111111111112 ], "showticklabels": false, "type": "linear" @@ -5632,7 +5632,7 @@ "matches": "y", "range": [ 0.8713888888888889, - 0.9436111111111111 + 0.9436111111111112 ], "title": { "text": "accuracy" @@ -7296,7 +7296,7 @@ ], "range": [ 0.2872402154737049, - 1.5450774646467633 + 1.545077464646763 ], "title": { "text": "loss" @@ -7321,7 +7321,7 @@ "matches": "y", "range": [ 0.2872402154737049, - 1.5450774646467633 + 1.545077464646763 ], "showticklabels": false, "type": "linear" @@ -7347,7 +7347,7 @@ "matches": "y", "range": [ 0.2872402154737049, - 1.5450774646467633 + 1.545077464646763 ], "showticklabels": false, "type": "linear" @@ -7371,7 +7371,7 @@ "matches": "y", "range": [ 0.2872402154737049, - 1.5450774646467633 + 1.545077464646763 ], "title": { "text": "loss" @@ -7490,7 +7490,7 @@ 0.3025054633617401, 0.6105697751045227, 0.8058550953865051, - 0.46179690957069397, + 0.461796909570694, 0.9096941351890564, 0.8012530207633972, 0.7488439083099365, @@ -7504,7 +7504,7 @@ 0.8776991963386536, 0.897608757019043, 0.5343601107597351, - 0.49749472737312317, + 0.4974947273731232, 0.5951083898544312, 0.48186638951301575, 0.47553735971450806, @@ -7553,7 +7553,7 @@ 0.8744791746139526, 0.3193732500076294, 0.5335767865180969, - 0.41933003067970276, + 0.4193300306797027, 0.6741343140602112, 0.8939380049705505, 0.8922183513641357, @@ -7569,9 +7569,9 @@ 0.5744775533676147, 0.809404730796814, 0.5359721183776855, - 0.9331763982772827, + 0.9331763982772828, 0.6111627221107483, - 0.39885184168815613, + 0.3988518416881562, 0.5987299084663391, 0.5188687443733215, 0.5216705203056335, @@ -7597,7 +7597,7 @@ 0.6003716588020325, 0.5558609366416931, 0.5203385949134827, - 0.43657800555229187, + 0.4365780055522918, 0.7684441804885864, 0.731541097164154, 0.2905101180076599, @@ -7625,7 +7625,7 @@ 0.5337222218513489, 0.6825911998748779, 0.5482040047645569, - 0.19413185119628906, + 0.19413185119628903, 0.2649698257446289, 0.6861809492111206, 0.6297410130500793, @@ -7648,7 +7648,7 @@ 0.4740462303161621, 0.6392838954925537, 0.7016109228134155, - 0.46477293968200684, + 0.4647729396820069, 0.4744134545326233, 0.5093992352485657, 0.6113561391830444, @@ -7657,7 +7657,7 @@ 0.3816279470920563, 0.5957162380218506, 0.5358200073242188, - 0.49080508947372437, + 0.4908050894737244, 0.8091211915016174, 0.8412520289421082, 0.43756920099258423, @@ -7667,10 +7667,10 @@ 0.7929005026817322, 0.7931212782859802, 0.5746145248413086, - 0.9653676152229309, + 0.9653676152229308, 0.6876810193061829, 0.5946034789085388, - 0.9576044678688049, + 0.9576044678688048, 0.19917243719100952, 0.6899040937423706, 0.6616352796554565, @@ -7685,10 +7685,10 @@ 0.3996334969997406, 0.40564778447151184, 0.5005411505699158, - 0.11771949380636215, + 0.11771949380636217, 0.592231035232544, 0.27257904410362244, - 0.38883107900619507, + 0.388831079006195, 0.8052856922149658, 0.8636611700057983, 0.7598906755447388, @@ -7740,12 +7740,12 @@ 0.6729695200920105, 0.8856930732727051, 0.8621565699577332, - 0.46972373127937317, + 0.4697237312793732, 0.22104378044605255, 0.7239076495170593, 0.5897582173347473, 0.7421569228172302, - 0.40141376852989197, + 0.401413768529892, 0.794511616230011, 0.6842513084411621, 0.9236469864845276, @@ -7757,7 +7757,7 @@ 0.4818022549152374, 0.6475313305854797, 0.6119240522384644, - 0.44683924317359924, + 0.4468392431735992, 0.7293417453765869, 0.5919987559318542, 0.7591642141342163, @@ -7769,7 +7769,7 @@ 0.5095839500427246, 0.46767669916152954, 0.8782474398612976, - 0.9075431823730469, + 0.9075431823730468, 0.11835505813360214, 0.5267267823219299, 0.4562215209007263, @@ -7778,7 +7778,7 @@ 0.7791534066200256, 0.4049462676048279, 0.17458102107048035, - 0.46790391206741333, + 0.4679039120674134, 0.5910201072692871, 0.29316946864128113, 0.6545480489730835, @@ -7787,7 +7787,7 @@ 0.6345252990722656, 0.657397449016571, 0.8343915939331055, - 0.40935301780700684, + 0.4093530178070069, 0.6739963293075562, 0.34839361906051636, 0.32727324962615967, @@ -7804,7 +7804,7 @@ 0.5332617163658142, 0.5397392511367798, 0.6324774622917175, - 0.40632718801498413, + 0.4063271880149842, 0.3552083671092987, 0.4035123586654663, 0.7392342686653137, @@ -7818,7 +7818,7 @@ 0.505524754524231, 0.6473691463470459, 0.4643056392669678, - 0.9150946140289307, + 0.9150946140289308, 0.6437070965766907, 0.5092566013336182, 0.4279191792011261, @@ -7827,7 +7827,7 @@ 0.6137130260467529, 0.6761367321014404, 0.12356094270944595, - 0.40150579810142517, + 0.4015057981014252, 0.5352786779403687, 0.7268828749656677, 0.2541159689426422, @@ -7837,7 +7837,7 @@ 0.45136895775794983, 0.6897038221359253, 0.5453612804412842, - 0.9120858311653137, + 0.9120858311653136, 0.9534286260604858, 0.38422852754592896, 0.5806923508644104, @@ -7860,7 +7860,7 @@ 0.2645470201969147, 0.1280735582113266, 0.6090183258056641, - 0.47725704312324524, + 0.4772570431232453, 0.5096180438995361, 0.3606683015823364, 0.7877698540687561, @@ -7882,14 +7882,14 @@ 0.5711411833763123, 0.8334606289863586, 0.6680223345756531, - 0.18423649668693542, + 0.18423649668693545, 0.6327370405197144, 0.7210519909858704, 0.41778066754341125, 0.8494082689285278, 0.7317454218864441, 0.16045817732810974, - 0.9194920063018799, + 0.91949200630188, 0.7132930755615234, 0.447072297334671, 0.868039071559906, @@ -7904,7 +7904,7 @@ 0.5457545518875122, 0.987792432308197, 0.6133803129196167, - 0.9041551947593689, + 0.9041551947593688, 0.35303550958633423, 0.6219542622566223, 0.6207598447799683, @@ -7945,7 +7945,7 @@ 0.7479601502418518, 0.5891719460487366, 0.5052285194396973, - -0.012029256671667099, + -0.0120292566716671, 0.7709308862686157, 0.6368439197540283, 0.2824801802635193, @@ -7959,7 +7959,7 @@ 0.2950768768787384, 0.602649450302124, 0.7122810482978821, - 0.38516971468925476, + 0.3851697146892547, 0.9442123770713806, 0.3462761640548706, 0.278072714805603, @@ -7982,7 +7982,7 @@ 0.15773607790470123, 0.7636581659317017, 0.8616477251052856, - 0.47455793619155884, + 0.4745579361915589, 0.510814905166626, 0.6154623627662659, 0.7775927186012268, @@ -8030,10 +8030,10 @@ 0.2908845543861389, 0.4212500751018524, 0.7124006152153015, - 0.49751684069633484, + 0.4975168406963349, 0.4385144114494324, 0.6140978336334229, - 0.39972764253616333, + 0.3997276425361634, 0.1435338407754898, 0.5215296149253845, 0.5639413595199585, @@ -8046,13 +8046,13 @@ 0.5518397092819214, 0.4903949499130249, 0.7374151945114136, - 0.18291746079921722, + 0.1829174607992172, 0.7056958079338074, 0.5351248979568481, 0.7788393497467041, 0.6267669796943665, 0.5285612940788269, - 0.9709029793739319, + 0.970902979373932, 0.886104166507721, 0.5901898741722107, 0.7488778233528137, @@ -8065,14 +8065,14 @@ 0.6405933499336243, 0.30090323090553284, 0.6647000312805176, - 0.9827840328216553, + 0.9827840328216552, 0.6350420713424683, 0.7284623384475708, 0.42385753989219666, 0.34456077218055725, 0.6297999620437622, 0.4169467091560364, - 0.9607759714126587, + 0.9607759714126588, 0.7366241216659546, 0.4249221980571747, 0.3078641891479492, @@ -8098,7 +8098,7 @@ 0.8586510419845581, 0.807817816734314, 0.24719277024269104, - 0.46559852361679077, + 0.4655985236167908, 0.8525004982948303, 0.49789920449256897, 0.7506644129753113, @@ -8111,7 +8111,7 @@ 0.7279955148696899, 0.7243854999542236, 0.7302038073539734, - 0.044148579239845276, + 0.04414857923984528, 0.1336016058921814, 0.37188753485679626, 0.8146644234657288, @@ -8134,7 +8134,7 @@ 0.4728325605392456, 0.4881300628185272, 0.740663468837738, - 0.45647329092025757, + 0.4564732909202576, 0.6335883140563965, 0.6632585525512695, 0.6120293140411377, @@ -8164,7 +8164,7 @@ 0.6522490978240967, 0.858093798160553, 0.6435357332229614, - 0.18906255066394806, + 0.1890625506639481, 0.7877765893936157, 0.6084040999412537, 0.7688128352165222, @@ -8204,7 +8204,7 @@ 0.4470277726650238, 0.17086978256702423, 0.3000096082687378, - 0.47072145342826843, + 0.4707214534282684, 0.4743010103702545, 0.6189950704574585, 0.8075470328330994, @@ -8232,7 +8232,7 @@ 0.7457849979400635, 0.6100540161132812, 0.5431568026542664, - 0.22713299095630646, + 0.22713299095630649, 0.4129021465778351, 0.7720144987106323, 0.6626931428909302, @@ -8304,7 +8304,7 @@ 0.4871593415737152, 0.5398567914962769, 0.7605876922607422, - 0.12210622429847717, + 0.12210622429847716, 0.6611909866333008, 0.2299708127975464, 0.6362550258636475, @@ -8330,7 +8330,7 @@ 0.5898472666740417, 0.5297929644584656, 0.5300968885421753, - 0.21481241285800934, + 0.21481241285800937, 0.6401244401931763, 0.42251187562942505, 0.506644070148468, @@ -8343,7 +8343,7 @@ 0.8486484289169312, 0.5219904780387878, 0.8709440231323242, - 0.40816137194633484, + 0.4081613719463349, 0.7856060862541199, 0.5394358038902283, 0.6849074959754944, @@ -8353,7 +8353,7 @@ 0.4930421710014343, 0.7519218921661377, 0.5349624752998352, - 0.9860039353370667, + 0.9860039353370668, 0.6874377727508545, 0.5700061917304993, 0.8226103782653809, @@ -8385,7 +8385,7 @@ 0.8722271919250488, 0.668117105960846, 0.6584043502807617, - 0.41717827320098877, + 0.4171782732009888, 0.7726898193359375, 0.8302673101425171, 0.8429504036903381, @@ -8447,7 +8447,7 @@ 0.6988707780838013, 0.39946040511131287, 0.15337969362735748, - 0.40424078702926636, + 0.4042407870292664, 0.6450180411338806, 0.6802632808685303, 0.8201731443405151, @@ -8468,7 +8468,7 @@ 0.7532283663749695, 0.45349955558776855, 0.49324968457221985, - 0.39067643880844116, + 0.3906764388084411, 0.4413166344165802, 0.593929648399353, 0.7956624627113342, @@ -8489,7 +8489,7 @@ 0.4453200101852417, 0.6673714518547058, 0.48552611470222473, - 0.38263726234436035, + 0.3826372623443603, 0.5510727763175964, 0.2528418302536011, 0.5193676948547363, @@ -8534,36 +8534,36 @@ -0.03668558970093727, -0.02967647835612297, -0.12830156087875366, - 0.15515615046024323, + 0.15515615046024325, 0.10520470887422562, 0.004296391271054745, 0.0953262522816658, 0.14223447442054749, - -0.10897369682788849, - 0.18912823498249054, + -0.10897369682788847, + 0.18912823498249057, -0.2797534465789795, -0.07693223655223846, -0.1597622185945511, -0.24516843259334564, -0.02439267560839653, -0.1582990139722824, - 0.09865007549524307, + 0.09865007549524309, -0.257803350687027, - -0.10514713078737259, - -0.09709038585424423, - -0.12135279178619385, + -0.1051471307873726, + -0.09709038585424425, + -0.12135279178619383, 0.1258426308631897, 0.3173837661743164, 0.20221276581287384, - -0.054094888269901276, + -0.05409488826990128, -0.15280120074748993, 0.16795380413532257, - -0.022243546321988106, + -0.022243546321988102, 0.2351001799106598, -0.2868553102016449, 0.1764918863773346, 0.2903561294078827, - -0.12004204094409943, + -0.12004204094409944, -0.289218932390213, -0.16049396991729736, 0.0457816906273365, @@ -8571,19 +8571,19 @@ 0.04793287068605423, -0.1836176961660385, 0.2359730452299118, - 0.10301758348941803, + 0.10301758348941804, 0.04663403332233429, 0.3528800904750824, -0.06776353716850281, 0.035028111189603806, -0.20923064649105072, 0.0872136577963829, - -0.12049486488103867, + -0.12049486488103868, 0.19757969677448273, -0.14821933209896088, - 0.22679069638252258, + 0.22679069638252256, -0.193757563829422, - 0.21949312090873718, + 0.2194931209087372, -0.12036675959825516, -0.17021365463733673, -0.10896503925323486, @@ -8596,16 +8596,16 @@ -0.10335858911275864, -0.12295187264680862, -0.1428886502981186, - -0.009412713348865509, + -0.009412713348865507, -0.06056904420256615, - 0.053275417536497116, - -0.10230251401662827, - 0.012817472219467163, + 0.05327541753649712, + -0.10230251401662828, + 0.012817472219467165, -0.0648675188422203, -0.14626444876194, -0.17029376327991486, 0.164293572306633, - -0.19011680781841278, + -0.1901168078184128, -0.02524411864578724, 0.060752104967832565, -0.034422483295202255, @@ -8616,20 +8616,20 @@ 0.16364550590515137, -0.006327771116048098, 0.17553208768367767, - -0.10808877646923065, + -0.10808877646923064, 0.32587167620658875, - 0.024409949779510498, + 0.024409949779510495, 0.2134270966053009, -0.033544596284627914, 0.2789735496044159, -0.2669259011745453, -0.03977326303720474, - -0.09765448421239853, + -0.09765448421239852, 0.19157880544662476, -0.16466809809207916, 0.2960720658302307, -0.14701977372169495, - 0.024006091058254242, + 0.02400609105825424, 0.5359187722206116, -0.13232533633708954, 0.1298927217721939, @@ -8648,13 +8648,13 @@ -0.08012242615222931, 0.0053380075842142105, 0.18362735211849213, - -0.21329078078269958, + -0.2132907807826996, -0.2345273196697235, 0.0013207707088440657, -0.02117733471095562, 0.15326784551143646, -0.2228553295135498, - -0.20020727813243866, + -0.20020727813243863, -0.02947295270860195, -0.14788372814655304, 0.4352216124534607, @@ -8674,11 +8674,11 @@ 0.07401598244905472, 0.10157324373722076, 0.04711907356977463, - 0.09162778407335281, + 0.0916277840733528, 0.1608046442270279, 0.11008800566196442, 0.005915212910622358, - 7.877714779169764e-06, + 7.877714779169764e-6, -0.18636898696422577, -0.14036914706230164, 0.22726751863956451, @@ -8690,7 +8690,7 @@ -0.09915678948163986, -0.20518849790096283, 0.08790075033903122, - 0.028400132432579994, + 0.02840013243257999, -0.2129179835319519, 0.14262773096561432, 0.10974042117595673, @@ -8712,27 +8712,27 @@ -0.09064263105392456, -0.13136397302150726, 0.11595435440540314, - 0.10625079274177551, + 0.10625079274177553, 0.04720696434378624, - 0.14692085981369019, + 0.1469208598136902, -0.102681465446949, 0.03723100200295448, 0.07055085897445679, -0.051042765378952026, -0.152899369597435, - 0.060371171683073044, - 0.10018381476402283, + 0.06037117168307304, + 0.10018381476402284, -0.19558073580265045, 0.02637379989027977, -0.13171330094337463, -0.1546192169189453, - -0.11873248964548111, - -0.24184353649616241, + -0.11873248964548112, + -0.2418435364961624, 0.13916482031345367, 0.05833540856838226, -0.08269258588552475, -0.17039726674556732, - -0.09904510527849197, + -0.09904510527849196, -0.09211145341396332, -0.17118015885353088, -0.1999327540397644, @@ -8741,7 +8741,7 @@ 0.039884451776742935, 0.07918731123209, 0.13738484680652618, - -0.18808403611183167, + -0.1880840361118317, 0.024974266067147255, -0.2702469527721405, -0.10568739473819733, @@ -8760,20 +8760,20 @@ -0.20281249284744263, -0.021253010258078575, 0.26244011521339417, - -0.028162915259599686, - -0.09063762426376343, + -0.02816291525959969, + -0.09063762426376344, -0.05512038990855217, 0.12913590669631958, -0.08988860994577408, -0.1783338189125061, 0.14045585691928864, -0.33523449301719666, - 0.41526293754577637, - 0.012141996994614601, + 0.4152629375457764, + 0.0121419969946146, -0.13590951263904572, -0.18816429376602173, -0.03140139952301979, - -0.10828938335180283, + -0.10828938335180284, 0.08278695493936539, -0.14623720943927765, 0.3382360637187958, @@ -8816,9 +8816,9 @@ 0.1526947021484375, -0.14808838069438934, 0.033112864941358566, - -0.20084600150585175, + -0.20084600150585177, -0.24854212999343872, - -0.10837492346763611, + -0.10837492346763612, -0.056307364255189896, 0.15710487961769104, -0.05363558977842331, @@ -8826,7 +8826,7 @@ 0.08003206551074982, 0.021740082651376724, -0.13676343858242035, - -0.09113214164972305, + -0.09113214164972304, 0.10926882922649384, -0.12845870852470398, 0.10484986752271652, @@ -8843,20 +8843,20 @@ 0.05165703222155571, -0.2543173134326935, -0.3640163540840149, - -0.21098308265209198, + -0.210983082652092, -0.0002943961590062827, -0.1103072389960289, - -0.15707895159721375, - -0.0019281546119600534, + -0.15707895159721377, + -0.0019281546119600537, 0.001089331228286028, -0.05289708450436592, - -0.19868867099285126, + -0.19868867099285129, -0.13268432021141052, 0.26294416189193726, 0.04254290089011192, -0.025100933387875557, 0.04120016098022461, - -0.09672898054122925, + -0.09672898054122923, 0.13869889080524445, 0.12935855984687805, -0.04509389400482178, @@ -8865,19 +8865,19 @@ -0.1062588319182396, -0.025410326197743416, 0.044150713831186295, - -0.11249110847711563, + -0.11249110847711564, -0.12096500396728516, -0.1867653727531433, -0.06755836308002472, -0.09656509011983871, -0.08991815894842148, -0.1641472429037094, - -0.18047568202018738, + -0.18047568202018735, -0.1382654309272766, -0.01557458471506834, -0.2560745179653168, 0.2680909335613251, - -0.12489670515060425, + -0.12489670515060423, 0.29391661286354065, -0.07776159048080444, 0.06777409464120865, @@ -8905,11 +8905,11 @@ 0.05331982299685478, -0.07498934864997864, -0.2081444412469864, - 0.030046459287405014, + 0.030046459287405018, 0.17176669836044312, -0.12508916854858398, -0.14103733003139496, - -0.21521301567554474, + -0.2152130156755447, -0.08345188945531845, 0.20666545629501343, 0.16691675782203674, @@ -8927,18 +8927,18 @@ -0.2775830626487732, -0.08571483194828033, 0.2494424432516098, - 0.19296985864639282, + 0.1929698586463928, -0.15098422765731812, -0.06749912351369858, - -0.21805696189403534, - -0.20510393381118774, + -0.21805696189403537, + -0.20510393381118777, 0.03165200352668762, - -0.09657551348209381, + -0.0965755134820938, -0.055464424192905426, -0.1535521298646927, 0.2183075249195099, -0.06677652895450592, - -0.10195451229810715, + -0.10195451229810716, -0.05761260539293289, 0.011987813748419285, -0.019483191892504692, @@ -8958,9 +8958,9 @@ 0.4363161027431488, -0.054133374243974686, 0.04984976351261139, - -0.09680512547492981, + -0.0968051254749298, -0.08825217187404633, - 0.13836230337619781, + 0.1383623033761978, 0.19392959773540497, 0.2577168941497803, -0.16410180926322937, @@ -8978,17 +8978,17 @@ 0.5873444080352783, 0.03705146536231041, -0.10577570647001266, - -0.09227626770734787, + -0.09227626770734788, -0.029516762122511864, -0.10182581841945648, -0.04308932647109032, - -0.20235814154148102, + -0.20235814154148105, -0.15028180181980133, 0.34788036346435547, 0.014004450291395187, -0.041540030390024185, -0.2799775302410126, - -0.13629958033561707, + -0.1362995803356171, -0.04802396148443222, 0.053821079432964325, 0.02863389067351818, @@ -8997,17 +8997,17 @@ 0.19852471351623535, -0.08113377541303635, -0.02975352853536606, - -0.11160653084516525, + -0.11160653084516524, -0.35735467076301575, -0.050225600600242615, -0.0010369982337579131, - -0.025365019217133522, + -0.02536501921713352, 0.01293940655887127, 0.1213097795844078, -0.10545935481786728, -0.04072507098317146, - -0.20563030242919922, - -0.24401414394378662, + -0.2056303024291992, + -0.24401414394378665, 0.09600849449634552, -0.13664796948432922, -0.04669738933444023, @@ -9021,7 +9021,7 @@ -0.1623511016368866, 0.006387416739016771, -0.1609964817762375, - -0.10735078901052475, + -0.10735078901052476, 0.02341507002711296, -0.1456366777420044, -0.2472882717847824, @@ -9066,41 +9066,41 @@ -0.0649750754237175, -0.1500633955001831, 0.09674299508333206, - 0.11028725653886795, + 0.11028725653886796, 0.030607158318161964, -0.08840877562761307, -0.19735775887966156, -0.2422848343849182, - -0.20831188559532166, + -0.20831188559532168, -0.00921051949262619, -0.02572818286716938, - 0.21367280185222626, + 0.21367280185222623, 0.09048262238502502, 0.2569093704223633, 0.018784919753670692, 0.0387856587767601, 0.1880350559949875, 0.15150636434555054, - -0.11476016789674759, + -0.1147601678967476, -0.1270519196987152, 0.053318772464990616, -0.021673204377293587, 0.0569000281393528, 0.27066895365715027, -0.08854015171527863, - -0.11383529007434845, + -0.11383529007434844, -0.06382360309362411, -0.019926894456148148, -0.1664894074201584, - -0.09651592373847961, + -0.0965159237384796, 0.01189391128718853, -0.14253482222557068, 0.01111197005957365, -0.1695888787508011, 0.04357616975903511, -0.038459137082099915, - -0.09353005141019821, - -0.18241077661514282, + -0.0935300514101982, + -0.1824107766151428, 0.20452404022216797, 0.052489493042230606, -0.2357536107301712, @@ -9110,7 +9110,7 @@ 0.1112687736749649, -0.03847147524356842, 0.021121637895703316, - -0.11864704638719559, + -0.1186470463871956, -0.04800697788596153, 0.2170422077178955, 0.08131680637598038, @@ -9127,7 +9127,7 @@ -0.25556132197380066, 0.025113414973020554, -0.23881718516349792, - 0.09327007085084915, + 0.09327007085084917, -0.1805567741394043, -0.006314815022051334, -0.16265331208705902, @@ -9155,14 +9155,14 @@ 0.0322144441306591, -0.008752944879233837, 0.04627394303679466, - -0.18743173778057098, + -0.18743173778057096, 0.27831554412841797, 0.06585104763507843, -0.12198398262262344, -0.2511722147464752, -0.01519078016281128, 0.08676321059465408, - 0.20844121277332306, + 0.20844121277332303, 0.20499026775360107, -0.17519228160381317, 0.26006272435188293, @@ -9185,50 +9185,50 @@ 0.03849049657583237, -0.19831794500350952, -0.030260225757956505, - 0.09374415874481201, + 0.093744158744812, 0.11911945790052414, -0.0331648588180542, 0.0249052494764328, - -0.20847512781620026, + -0.20847512781620023, -0.04912380501627922, -0.09060020744800568, 0.0041393740102648735, 0.08260954171419144, 0.28917890787124634, - 0.11353360116481781, + 0.1135336011648178, 0.18905530869960785, 0.06985466182231903, -0.11127009242773056, -0.13215819001197815, - -0.09509347379207611, - -0.12497235834598541, + -0.09509347379207612, + -0.1249723583459854, -0.1052391454577446, -0.115229532122612, -0.15838804841041565, - 0.09114290028810501, + 0.091142900288105, 0.017006464302539825, - -0.15955978631973267, - 0.21374110877513885, + -0.1595597863197327, + 0.21374110877513883, 0.018943697214126587, 0.14304839074611664, 0.04406796395778656, - 0.021914372220635414, + 0.021914372220635418, 0.18676243722438812, -0.15146197378635406, 0.14446447789669037, 0.010648179799318314, - -0.11998894810676575, + -0.11998894810676576, -0.1678878217935562, - 0.24709999561309814, - 0.09188219904899597, + 0.24709999561309817, + 0.09188219904899596, 0.17794454097747803, -0.028883375227451324, -0.011555660516023636, -0.08108507096767426, -0.033134639263153076, -0.005855693947523832, - -0.23795394599437714, - -0.014032312668859959, + -0.2379539459943771, + -0.01403231266885996, -0.26128217577934265, -0.11885055899620056, -0.1271451860666275, @@ -9259,8 +9259,8 @@ -0.04794347286224365, 0.019008269533514977, 0.276480495929718, - -0.026792148128151894, - 0.11641503870487213, + -0.026792148128151897, + 0.11641503870487212, -0.1911795288324356, -0.07146921008825302, 0.033605434000492096, @@ -9276,17 +9276,17 @@ -0.05784895271062851, 0.0790208950638771, 0.02035927027463913, - -0.009881574660539627, + -0.009881574660539629, -0.21079392731189728, - -0.10429079085588455, + -0.10429079085588457, -0.086237333714962, - 0.17189259827136993, + 0.1718925982713699, 0.050568029284477234, - 0.16530448198318481, + 0.1653044819831848, -0.15082618594169617, -0.045381445437669754, -0.07446739077568054, - 0.011606995016336441, + 0.01160699501633644, -0.07797817885875702, 0.056026775389909744, 0.17007365822792053, @@ -9295,7 +9295,7 @@ 0.06451155990362167, 0.0030352717731148005, -0.07011640071868896, - -0.10365432500839233, + -0.10365432500839232, -0.12560243904590607, -0.17814984917640686, -0.06571955978870392, @@ -9318,24 +9318,24 @@ 0.6022669672966003, 0.15398916602134705, 0.20902477204799652, - 0.20876236259937286, + 0.2087623625993729, 0.18086235225200653, 0.07363109290599823, 0.09854523837566376, - -0.11174990981817245, + -0.11174990981817244, -0.10014349222183228, - -0.19403786957263947, + -0.1940378695726395, -0.07246334850788116, - -0.11833269894123077, - 0.42225217819213867, + -0.11833269894123076, + 0.4222521781921386, -0.16917045414447784, -0.2570873200893402, -0.02398148737847805, - -0.10556759685277939, + -0.1055675968527794, 0.09521300345659256, - -0.18234530091285706, + -0.18234530091285703, 0.07760285586118698, - -0.013780430890619755, + -0.013780430890619757, -0.07362792640924454, -0.09722840785980225, -0.13073264062404633, @@ -9346,13 +9346,13 @@ -0.0797082781791687, -0.07295921444892883, -0.07594586908817291, - 0.11988383531570435, + 0.11988383531570436, 0.03876844421029091, 0.5234305262565613, 0.13133402168750763, 0.03126957267522812, 0.38323619961738586, - -0.22509567439556122, + -0.2250956743955612, -0.057771943509578705, -0.0006328505114652216, -0.22255954146385193, @@ -9365,7 +9365,7 @@ -0.17817652225494385, 0.02920740842819214, -0.26384490728378296, - -0.12123551219701767, + -0.12123551219701768, 0.12327218055725098, -0.14704327285289764, 0.16698706150054932, @@ -9383,10 +9383,10 @@ -0.0715848058462143, -0.07252686470746994, -0.14103670418262482, - 0.09547247737646103, + 0.09547247737646104, 0.01067193504422903, -0.08344345539808273, - 0.18329143524169922, + 0.1832914352416992, 0.3317137658596039, 0.02372228540480137, 0.053484298288822174, @@ -9395,7 +9395,7 @@ 0.1158069521188736, 0.13480199873447418, -0.06562745571136475, - -0.12297860532999039, + -0.1229786053299904, 0.015512166544795036, 0.13030056655406952, 0.006235544569790363, @@ -9418,18 +9418,18 @@ 0.03610147163271904, 0.2428261935710907, 0.03933078050613403, - -0.11857811361551285, + -0.11857811361551283, 0.04964561387896538, - 0.19899210333824158, + 0.1989921033382416, -0.17578886449337006, -0.05021820589900017, - -0.041759517043828964, + -0.04175951704382896, 0.125326469540596, - -0.10291323810815811, + -0.10291323810815813, 0.016880793496966362, -0.0063499403186142445, 0.171964630484581, - 0.22205816209316254, + 0.22205816209316256, 0.04833061620593071, 0.015052944421768188, -0.0653747171163559, @@ -9448,7 +9448,7 @@ -0.12010125070810318, 0.06387165933847427, 0.1688791811466217, - 0.09689774364233017, + 0.09689774364233016, 0.0011660908348858356, -0.13024349510669708, 0.09852765500545502, @@ -9459,8 +9459,8 @@ -0.17841854691505432, -0.07768139988183975, 0.058484744280576706, - 0.20429138839244843, - 0.20226426422595978, + 0.20429138839244845, + 0.20226426422595975, 0.17406059801578522, 0.13223981857299805, 0.041662514209747314, @@ -9481,7 +9481,7 @@ 0.15776661038398743, 0.2700437307357788, 0.2535266876220703, - -0.11472392827272415, + -0.11472392827272417, -0.23032914102077484, 0.17084358632564545, 0.33131054043769836, @@ -9489,15 +9489,15 @@ -0.1638103425502777, -0.21310019493103027, -0.03135610371828079, - -0.014912066049873829, + -0.014912066049873827, -0.07116098701953888, -0.1212654784321785, 0.18740515410900116, -0.10623271018266678, - -0.11239394545555115, + -0.11239394545555116, -0.08722148090600967, 0.39329102635383606, - -0.10563691705465317, + -0.10563691705465315, -0.1142745316028595, 0.1495191901922226, 0.04044044017791748, @@ -9508,26 +9508,26 @@ 0.12312790751457214, -0.043178491294384, -0.140348881483078, - -9.998288442147896e-05, + -0.00009998288442147896, -0.3160385489463806, 0.18520790338516235, - -0.09224879741668701, + -0.092248797416687, -0.0073734428733587265, - 0.11756537109613419, + 0.1175653710961342, -0.06429873406887054, 0.35748258233070374, -0.08586011081933975, 0.02026611752808094, - 0.10499594360589981, + 0.1049959436058998, 0.04769602417945862, -0.09397713840007782, -0.26860514283180237, -0.04393130913376808, - -0.010379106737673283, + -0.010379106737673284, 0.16146495938301086, -0.04151611402630806, 0.010767264291644096, - 0.10440745204687119, + 0.1044074520468712, -0.09345664083957672, -0.08596847951412201, 0.06537594646215439, @@ -9545,7 +9545,7 @@ -0.14892913401126862, -0.10962524265050888, -0.08491133898496628, - -0.12285691499710083, + -0.12285691499710084, -0.04126023128628731, -0.017301315441727638, -0.013661161065101624, diff --git a/contrib/hamilton/contrib/user/zilto/lancedb_vdb/run.ipynb b/contrib/hamilton/contrib/user/zilto/lancedb_vdb/run.ipynb index af3aab9b3..9c8f34ddb 100644 --- a/contrib/hamilton/contrib/user/zilto/lancedb_vdb/run.ipynb +++ b/contrib/hamilton/contrib/user/zilto/lancedb_vdb/run.ipynb @@ -13,13 +13,12 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", - "from IPython.display import display\n", - "from hamilton import driver\n", - "\n", + "import __init__ as lancedb_vdb\n", "import lancedb\n", "import pyarrow as pa\n", - "import __init__ as lancedb_vdb" + "from IPython.display import display\n", + "\n", + "from hamilton import driver" ] }, { @@ -258,11 +257,7 @@ } ], "source": [ - "dr = (\n", - " driver.Builder()\n", - " .with_modules(lancedb_vdb)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(lancedb_vdb).build()\n", "\n", "display(dr.display_all_functions(None))" ] @@ -359,14 +354,16 @@ "\n", "inputs = dict(\n", " table_name=\"demo_table\",\n", - " schema=pa.schema([\n", - " (\"source_id\", pa.int64()),\n", - " (\"source\", pa.string()),\n", - " (\"vector\", lancedb.schema.vector(4)),\n", - " ]),\n", + " schema=pa.schema(\n", + " [\n", + " (\"source_id\", pa.int64()),\n", + " (\"source\", pa.string()),\n", + " (\"vector\", lancedb.schema.vector(4)),\n", + " ]\n", + " ),\n", " data=[\n", - " {\"source_id\": 0, \"source\": \"Google Search\", \"vector\": [0., 2., 11., 22.]},\n", - " {\"source_id\": 1, \"source\": \"Bing Search\", \"vector\": [11., 23., 2., 87.]}\n", + " {\"source_id\": 0, \"source\": \"Google Search\", \"vector\": [0.0, 2.0, 11.0, 22.0]},\n", + " {\"source_id\": 1, \"source\": \"Bing Search\", \"vector\": [11.0, 23.0, 2.0, 87.0]},\n", " ],\n", ")\n", "\n", @@ -447,7 +444,7 @@ "\n", "inputs = dict(\n", " table_name=\"demo_table\",\n", - " vector_query=[2., 2., 2., 2.],\n", + " vector_query=[2.0, 2.0, 2.0, 2.0],\n", ")\n", "\n", "res = dr.execute(\n", diff --git a/contrib/hamilton/contrib/user/zilto/nixtla_mlforecast/run.ipynb b/contrib/hamilton/contrib/user/zilto/nixtla_mlforecast/run.ipynb index 440359b60..38f546e5d 100644 --- a/contrib/hamilton/contrib/user/zilto/nixtla_mlforecast/run.ipynb +++ b/contrib/hamilton/contrib/user/zilto/nixtla_mlforecast/run.ipynb @@ -15,11 +15,11 @@ "source": [ "from pprint import pprint\n", "\n", - "from IPython.display import display\n", - "from hamilton import driver\n", + "import __init__ as nixtla_mlforecast\n", "import pandas as pd\n", + "from IPython.display import display\n", "\n", - "import __init__ as nixtla_mlforecast" + "from hamilton import driver" ] }, { @@ -394,11 +394,7 @@ } ], "source": [ - "dr = (\n", - " driver.Builder()\n", - " .with_modules(nixtla_mlforecast)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(nixtla_mlforecast).build()\n", "\n", "# create the DAG image\n", "display(dr.display_all_functions(None))" @@ -470,11 +466,7 @@ "\n", "overrides = dict()\n", "\n", - "results = dr.execute(\n", - " final_vars=final_vars,\n", - " inputs=inputs,\n", - " overrides=overrides\n", - ")\n", + "results = dr.execute(final_vars=final_vars, inputs=inputs, overrides=overrides)\n", "\n", "pprint(list(results.keys()), width=1)" ] diff --git a/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/run.ipynb b/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/run.ipynb index ff0c2aa8e..b3da88f50 100644 --- a/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/run.ipynb +++ b/contrib/hamilton/contrib/user/zilto/nixtla_statsforecast/run.ipynb @@ -24,11 +24,11 @@ "source": [ "from pprint import pprint\n", "\n", - "from IPython.display import display\n", - "from hamilton import driver\n", + "import __init__ as nixtla_statsforecast\n", "import pandas as pd\n", + "from IPython.display import display\n", "\n", - "import __init__ as nixtla_statsforecast" + "from hamilton import driver" ] }, { @@ -330,11 +330,7 @@ } ], "source": [ - "dr = (\n", - " driver.Builder()\n", - " .with_modules(nixtla_statsforecast)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(nixtla_statsforecast).build()\n", "\n", "# create the DAG image\n", "display(dr.display_all_functions(None))" @@ -353,7 +349,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "def download_m4_dataset() -> pd.DataFrame:\n", " \"\"\"Download Nixtla's version of the M4 hourly dataset. ref: https://paperswithcode.com/dataset/m4\"\"\"\n", " return pd.read_parquet(\"https://datasets-nixtla.s3.amazonaws.com/m4-hourly.parquet\")\n", @@ -404,11 +399,7 @@ "\n", "overrides = dict()\n", "\n", - "results = dr.execute(\n", - " final_vars=final_vars,\n", - " inputs=inputs,\n", - " overrides=overrides\n", - ")\n", + "results = dr.execute(final_vars=final_vars, inputs=inputs, overrides=overrides)\n", "\n", "pprint(list(results.keys()), width=1)" ] diff --git a/contrib/hamilton/contrib/user/zilto/webscraper/run.ipynb b/contrib/hamilton/contrib/user/zilto/webscraper/run.ipynb index d0cd3fb29..7a5e1583f 100644 --- a/contrib/hamilton/contrib/user/zilto/webscraper/run.ipynb +++ b/contrib/hamilton/contrib/user/zilto/webscraper/run.ipynb @@ -14,10 +14,11 @@ "outputs": [], "source": [ "from pprint import pprint\n", + "\n", + "import __init__ as webscraper\n", "from IPython.display import display\n", - "from hamilton import driver\n", "\n", - "import __init__ as webscraper" + "from hamilton import driver" ] }, { @@ -219,11 +220,7 @@ "\n", "overrides = dict()\n", "\n", - "res = dr.execute(\n", - " final_vars=final_vars,\n", - " inputs=inputs,\n", - " overrides=overrides\n", - ")\n", + "res = dr.execute(final_vars=final_vars, inputs=inputs, overrides=overrides)\n", "\n", "pprint(list(res.keys()), width=1)" ] diff --git a/contrib/hamilton/contrib/user/zilto/xgboost_optuna/run.ipynb b/contrib/hamilton/contrib/user/zilto/xgboost_optuna/run.ipynb index 214149913..77092fb9f 100644 --- a/contrib/hamilton/contrib/user/zilto/xgboost_optuna/run.ipynb +++ b/contrib/hamilton/contrib/user/zilto/xgboost_optuna/run.ipynb @@ -13,12 +13,13 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", "import json\n", + "from pprint import pprint\n", + "\n", + "import __init__ as xgboost_optuna\n", "from IPython.display import display\n", - "from hamilton import driver\n", "\n", - "import __init__ as xgboost_optuna" + "from hamilton import driver" ] }, { @@ -46,10 +47,11 @@ "def read_jsonl(file_path: str) -> list:\n", " data = []\n", " with open(file_path, \"r\") as f:\n", - " for line in f.readlines():\n", + " for line in f:\n", " data.append(json.loads(line))\n", " return data\n", "\n", + "\n", "valid_configs = read_jsonl(\"valid_configs.jsonl\")\n", "pprint(valid_configs, width=1)" ] @@ -469,16 +471,9 @@ } ], "source": [ - "config = dict(\n", - " task=\"classification\"\n", - ")\n", + "config = dict(task=\"classification\")\n", "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(xgboost_optuna)\n", - " .with_config(config)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(xgboost_optuna).with_config(config).build()\n", "\n", "display(dr.display_all_functions(None, orient=\"TB\"))" ] @@ -491,11 +486,12 @@ "source": [ "from sklearn.datasets import load_breast_cancer\n", "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import accuracy_score\n", "\n", "# Load the Boston Housing dataset (regression example)\n", "data = load_breast_cancer()\n", - "X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)" + "X_train, X_test, y_train, y_test = train_test_split(\n", + " data.data, data.target, test_size=0.2, random_state=42\n", + ")" ] }, { @@ -544,11 +540,7 @@ "\n", "overrides = dict()\n", "\n", - "res = dr.execute(\n", - " final_vars=final_vars,\n", - " inputs=inputs,\n", - " overrides=overrides\n", - ")\n", + "res = dr.execute(final_vars=final_vars, inputs=inputs, overrides=overrides)\n", "\n", "pprint(list(res.keys()), width=1)" ] diff --git a/dev_tools/language_server/tests/ls_setup.py b/dev_tools/language_server/tests/ls_setup.py index f2ea066e6..ff7a69256 100644 --- a/dev_tools/language_server/tests/ls_setup.py +++ b/dev_tools/language_server/tests/ls_setup.py @@ -38,9 +38,7 @@ def newfn(*args, **kwargs): try: return func(*args, **kwargs) except concurrent.futures._base.TimeoutError: - print( - "\n\nRetrying timeouted test server init " "%d of %d\n" % (attempt, RETRIES) - ) + print("\n\nRetrying timeouted test server init %d of %d\n" % (attempt, RETRIES)) attempt += 1 return func(*args, **kwargs) diff --git a/docs/concepts/ui.rst b/docs/concepts/ui.rst index 9f47a796f..adc339227 120000 --- a/docs/concepts/ui.rst +++ b/docs/concepts/ui.rst @@ -1 +1 @@ -../hamilton-ui/ui.rst \ No newline at end of file +../hamilton-ui/ui.rst diff --git a/docs/how-tos/cli-reference.md b/docs/how-tos/cli-reference.md index 1165e8959..f596b7760 100644 --- a/docs/how-tos/cli-reference.md +++ b/docs/how-tos/cli-reference.md @@ -9,6 +9,8 @@ The dependencies for the Apache Hamilton CLI can be installed via pip install sf-hamilton[cli] ``` +The CLI includes support for TOML files via the `tomli` library. When using TOML configuration files, the extra dependencies will be automatically available. + You can verify the installation with ```console @@ -28,8 +30,9 @@ hamilton --help **Commands**: * `build`: Build a single Driver with MODULES -* `diff`: Diff between the current MODULES and their... -* `version`: Version NODES and DATAFLOW from dataflow... +* `diff`: Diff between the current MODULES and their specified GIT_REFERENCE +* `validate`: Validate DATAFLOW execution for the given CONTEXT +* `version`: Version NODES and DATAFLOW from dataflow with MODULES * `view`: Build and visualize dataflow with MODULES ## `hamilton build` @@ -48,6 +51,10 @@ $ hamilton build [OPTIONS] MODULES... **Options**: +* `--name TEXT`: Name of the dataflow. Default: Derived from MODULES. +* `--context FILE`: Path to Driver context file [.json, .py, .toml]. For TOML files, Hamilton looks for either: + - Top-level Hamilton headers: `HAMILTON_CONFIG`, `HAMILTON_FINAL_VARS`, `HAMILTON_INPUTS`, `HAMILTON_OVERRIDES` + - Tool-specific section: `[tool.hamilton]` with `config`, `final_vars`, `inputs`, `overrides` sub-keys * `--help`: Show this message and exit. ## `hamilton diff` @@ -66,11 +73,78 @@ $ hamilton diff [OPTIONS] MODULES... **Options**: +* `--name TEXT`: Name of the dataflow. Default: Derived from MODULES. +* `--context FILE`: Path to Driver context file [.json, .py, .toml]. For TOML files, Hamilton looks for either: + - Top-level Hamilton headers: `HAMILTON_CONFIG`, `HAMILTON_FINAL_VARS`, `HAMILTON_INPUTS`, `HAMILTON_OVERRIDES` + - Tool-specific section: `[tool.hamilton]` with `config`, `final_vars`, `inputs`, `overrides` sub-keys +* `--output-file-path PATH`: Output path of visualization. If path is a directory, use NAME for file name. [default: .] * `--git-reference TEXT`: [default: HEAD] * `--view / --no-view`: [default: no-view] -* `--output-file-path PATH`: [default: diff.png] * `--help`: Show this message and exit. +## `hamilton validate` + +Validate DATAFLOW execution for the given CONTEXT + +**Usage**: + +```console +$ hamilton validate [OPTIONS] MODULES... +``` + +**Arguments**: + +* `MODULES...`: [required] + +**Options**: + +* `--context FILE`: [required] Path to Driver context file [.json, .py, .toml]. For TOML files, Hamilton looks for either: + - Top-level Hamilton headers: `HAMILTON_CONFIG`, `HAMILTON_FINAL_VARS`, `HAMILTON_INPUTS`, `HAMILTON_OVERRIDES` + - Tool-specific section: `[tool.hamilton]` with `config`, `final_vars`, `inputs`, `overrides` sub-keys +* `--name TEXT`: Name of the dataflow. Default: Derived from MODULES. +* `--help`: Show this message and exit. + +## Using TOML Files for Configuration + +Starting with version 1.90.0, the Hamilton CLI supports loading configuration from TOML files, including `pyproject.toml`. You can use either of these two formats: + +### Format 1: Top-level Hamilton headers + +In your TOML file, define the Hamilton configuration headers at the top level: + +```toml +# example_context.toml +HAMILTON_CONFIG = {param1 = "value1", param2 = 42} +HAMILTON_FINAL_VARS = ["final_result", "output_value"] +HAMILTON_INPUTS = {input_value = 100, string_input = "example"} +HAMILTON_OVERRIDES = {override_param = "override_value"} +``` + +### Format 2: Tool-specific section (recommended for pyproject.toml) + +For projects using `pyproject.toml`, it's recommended to place Hamilton configuration in the `[tool.hamilton]` section: + +```toml +# pyproject.toml +[tool.hamilton] +config = {param1 = "value1", param2 = 42} +final_vars = ["final_result", "output_value"] +inputs = {input_value = 100, string_input = "example"} +overrides = {override_param = "override_value"} +``` + +### Usage + +You can use TOML configuration files with all Hamilton CLI commands that support the `--context` option: + +```console +hamilton build --context config.toml my_module.py +hamilton validate --context config.toml my_module.py +hamilton view --context config.toml my_module.py +hamilton diff --context config.toml my_module.py +hamilton version --context config.toml my_module.py +``` + ## `hamilton version` Version NODES and DATAFLOW from dataflow with MODULES @@ -87,6 +161,10 @@ $ hamilton version [OPTIONS] MODULES... **Options**: +* `--name TEXT`: Name of the dataflow. Default: Derived from MODULES. +* `--context FILE`: Path to Driver context file [.json, .py, .toml]. For TOML files, Hamilton looks for either: + - Top-level Hamilton headers: `HAMILTON_CONFIG`, `HAMILTON_FINAL_VARS`, `HAMILTON_INPUTS`, `HAMILTON_OVERRIDES` + - Tool-specific section: `[tool.hamilton]` with `config`, `final_vars`, `inputs`, `overrides` sub-keys * `--help`: Show this message and exit. ## `hamilton view` @@ -105,5 +183,9 @@ $ hamilton view [OPTIONS] MODULES... **Options**: -* `--output-file-path PATH`: [default: ./dag.png] +* `--name TEXT`: Name of the dataflow. Default: Derived from MODULES. +* `--context FILE`: Path to Driver context file [.json, .py, .toml]. For TOML files, Hamilton looks for either: + - Top-level Hamilton headers: `HAMILTON_CONFIG`, `HAMILTON_FINAL_VARS`, `HAMILTON_INPUTS`, `HAMILTON_OVERRIDES` + - Tool-specific section: `[tool.hamilton]` with `config`, `final_vars`, `inputs`, `overrides` sub-keys +* `--output-file-path PATH`: Output path of visualization. If path is a directory, use NAME for file name. [default: .] * `--help`: Show this message and exit. diff --git a/docs/how-tos/pre-commit-hooks.md b/docs/how-tos/pre-commit-hooks.md index 803fcc4e3..a0295f259 100644 --- a/docs/how-tos/pre-commit-hooks.md +++ b/docs/how-tos/pre-commit-hooks.md @@ -106,7 +106,14 @@ In `v1`, the dataflow could be validated for `C` without any inputs. Now, a deve // will call .validate_execution(final_vars["C"]) ``` -```{note} +Or, using a TOML file: + +```toml +# context.toml +HAMILTON_FINAL_VARS = ["C"] # will call .validate_execution(final_vars["C"]) +``` + +``` {note} pre-commit hooks can prevent commits from breaking a core path, but you should use unit and integration tests for more robust checks. ``` @@ -125,6 +132,7 @@ To use them, add this snippet to your `.pre-commit-config.yaml` and adapt it to hamilton build my_module.py, hamilton build my_module2.py, hamilton validate --context context.json my_module.py my_module2.py, + hamilton validate --context context.toml my_module.py my_module2.py, # example with TOML file ] ``` diff --git a/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb b/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb index 875b13ed6..b4fd55428 100644 --- a/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb +++ b/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb @@ -16,7 +16,8 @@ "id": "4682d46e", "metadata": {}, "source": [ - "# Ingestion Notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb)\n", + "# Ingestion Notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/ingest_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\GraphRAG\\ingest_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\GraphRAG\\ingest_notebook.ipynb)\n", + "\n", "\n", "\n", "In this notebook we see how to load data into FalkorDB using [Hamilton](https://github.com/apache/hamilton)." @@ -271,11 +272,12 @@ "outputs": [], "source": [ "import falkordb\n", + "\n", "from hamilton import driver\n", "from hamilton.execution import executors\n", "\n", - "db = falkordb.FalkorDB(host='localhost', port=6379)\n", - "g = db.select_graph(\"UFC\")\n", + "db = falkordb.FalkorDB(host=\"localhost\", port=6379)\n", + "g = db.select_graph(\"UFC\")\n", "\n", "# Clear previous graph\n", "if \"UFC\" in db.list_graphs():\n", diff --git a/examples/LLM_Workflows/GraphRAG/notebook.ipynb b/examples/LLM_Workflows/GraphRAG/notebook.ipynb index f8ec7ac91..2b7f920cb 100644 --- a/examples/LLM_Workflows/GraphRAG/notebook.ipynb +++ b/examples/LLM_Workflows/GraphRAG/notebook.ipynb @@ -16,7 +16,8 @@ "id": "cbd976a6", "metadata": {}, "source": [ - "# Question & answer notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/notebook.ipynb)\n", + "# Question & answer notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/GraphRAG/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\GraphRAG\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\GraphRAG\\notebook.ipynb)\n", + "\n", "\n", "\n", "This notebook walks you through how to build a burr application that talks to falkorDB and openai to answer questions about UFC fights." @@ -36,16 +37,16 @@ "source": [ "# import what we need\n", "import json\n", + "import uuid\n", "from typing import Tuple\n", "\n", + "import falkordb\n", "import openai\n", - "from burr.core import ApplicationBuilder, State, default, expr, Application\n", + "from burr.core import ApplicationBuilder, State, default, expr\n", "from burr.core.action import action\n", "from burr.tracking import LocalTrackingClient\n", - "import uuid\n", "from falkordb import FalkorDB\n", - "from graph_schema import graph_schema\n", - "import falkordb" + "from graph_schema import graph_schema" ] }, { @@ -73,49 +74,54 @@ " \"\"\"Prompt to help tell the LLM what is in the graph DB\"\"\"\n", " prompt = \"The Knowledge graph contains nodes of the following types:\\n\"\n", "\n", - " for node in schema['nodes']:\n", + " for node in schema[\"nodes\"]:\n", " lbl = node\n", - " node = schema['nodes'][node]\n", - " if len(node['attributes']) > 0:\n", + " node = schema[\"nodes\"][node]\n", + " if len(node[\"attributes\"]) > 0:\n", " prompt += f\"The {lbl} node type has the following set of attributes:\\n\"\n", - " for attr in node['attributes']:\n", - " t = node['attributes'][attr]['type']\n", + " for attr in node[\"attributes\"]:\n", + " t = node[\"attributes\"][attr][\"type\"]\n", " prompt += f\"The {attr} attribute is of type {t}\\n\"\n", " else:\n", " prompt += f\"The {node} node type has no attributes:\\n\"\n", "\n", " prompt += \"In addition the Knowledge graph contains edge of the following types:\\n\"\n", "\n", - " for edge in schema['edges']:\n", + " for edge in schema[\"edges\"]:\n", " rel = edge\n", - " edge = schema['edges'][edge]\n", - " if len(edge['attributes']) > 0:\n", + " edge = schema[\"edges\"][edge]\n", + " if len(edge[\"attributes\"]) > 0:\n", " prompt += f\"The {rel} edge type has the following set of attributes:\\n\"\n", - " for attr in edge['attributes']:\n", - " t = edge['attributes'][attr]['type']\n", + " for attr in edge[\"attributes\"]:\n", + " t = edge[\"attributes\"][attr][\"type\"]\n", " prompt += f\"The {attr} attribute is of type {t}\\n\"\n", " else:\n", " prompt += f\"The {rel} edge type has no attributes:\\n\"\n", "\n", " prompt += f\"The {rel} edge connects the following entities:\\n\"\n", - " for conn in edge['connects']:\n", + " for conn in edge[\"connects\"]:\n", " src = conn[0]\n", " dest = conn[1]\n", " prompt += f\"{src} is connected via {rel} to {dest}, (:{src})-[:{rel}]->(:{dest})\\n\"\n", "\n", " return prompt\n", "\n", + "\n", "def set_inital_chat_history(schema_prompt: str) -> list[dict]:\n", " \"\"\"Helper to set initial system message\"\"\"\n", " SYSTEM_MESSAGE = \"You are a Cypher expert with access to a directed knowledge graph\\n\"\n", " SYSTEM_MESSAGE += schema_prompt\n", - " SYSTEM_MESSAGE += (\"Query the knowledge graph to extract relevant information to help you anwser the users \"\n", - " \"questions, base your answer only on the context retrieved from the knowledge graph, \"\n", - " \"do not use preexisting knowledge.\")\n", - " SYSTEM_MESSAGE += (\"For example to find out if two fighters had fought each other e.g. did Conor McGregor \"\n", - " \"every compete against Jose Aldo issue the following query: \"\n", - " \"MATCH (a:Fighter)-[]->(f:Fight)<-[]-(b:Fighter) WHERE a.Name = 'Conor McGregor' AND \"\n", - " \"b.Name = 'Jose Aldo' RETURN a, b\\n\")\n", + " SYSTEM_MESSAGE += (\n", + " \"Query the knowledge graph to extract relevant information to help you anwser the users \"\n", + " \"questions, base your answer only on the context retrieved from the knowledge graph, \"\n", + " \"do not use preexisting knowledge.\"\n", + " )\n", + " SYSTEM_MESSAGE += (\n", + " \"For example to find out if two fighters had fought each other e.g. did Conor McGregor \"\n", + " \"every compete against Jose Aldo issue the following query: \"\n", + " \"MATCH (a:Fighter)-[]->(f:Fight)<-[]-(b:Fighter) WHERE a.Name = 'Conor McGregor' AND \"\n", + " \"b.Name = 'Jose Aldo' RETURN a, b\\n\"\n", + " )\n", "\n", " messages = [{\"role\": \"system\", \"content\": SYSTEM_MESSAGE}]\n", " return messages" @@ -152,10 +158,12 @@ " if len(results) == 0:\n", " results = {\n", " \"error\": \"The query did not return any data, please make sure you're using the right edge \"\n", - " \"directions and you're following the correct graph schema\"}\n", + " \"directions and you're following the correct graph schema\"\n", + " }\n", "\n", " return str(results)\n", "\n", + "\n", "# description\n", "run_cypher_query_tool_description = {\n", " \"type\": \"function\",\n", @@ -247,7 +255,7 @@ " if tool_calls:\n", " new_state = new_state.update(tool_calls=tool_calls)\n", " # if there are no tool calls -- it means we didn't know what to do\n", - " return {\"ai_response\": response_message.content, \"usage\": response.usage.to_dict()}, new_state\n" + " return {\"ai_response\": response_message.content, \"usage\": response.usage.to_dict()}, new_state" ] }, { @@ -273,11 +281,11 @@ " result = {\"tool_calls\": []}\n", " for tool_call in tool_calls:\n", " function_name = tool_call.function.name\n", - " assert (function_name == \"run_cypher_query\")\n", + " assert function_name == \"run_cypher_query\"\n", " function_args = json.loads(tool_call.function.arguments)\n", " function_response = run_cypher_query(graph, function_args.get(\"query\"))\n", - " new_state = new_state.append(chat_history=\n", - " {\n", + " new_state = new_state.append(\n", + " chat_history={\n", " \"tool_call_id\": tool_call.id,\n", " \"role\": \"tool\",\n", " \"name\": function_name,\n", @@ -314,8 +322,7 @@ " ) # get a new response from the model where it can see the function response\n", " response_message = response.choices[0].message\n", " new_state = state.append(chat_history=response_message.to_dict())\n", - " return {\"ai_response\": response_message.content,\n", - " \"usage\": response.usage.to_dict()}, new_state\n" + " return {\"ai_response\": response_message.content, \"usage\": response.usage.to_dict()}, new_state" ] }, { @@ -341,7 +348,7 @@ "source": [ "# define our clients / connections / IDs\n", "openai_client = openai.OpenAI()\n", - "db_client = FalkorDB(host='localhost', port=6379)\n", + "db_client = FalkorDB(host=\"localhost\", port=6379)\n", "graph_name = \"UFC\"\n", "application_run_id = str(uuid.uuid4())" ] @@ -388,18 +395,18 @@ " AI_create_cypher_query.bind(client=openai_client),\n", " tool_call.bind(graph=graph),\n", " AI_generate_response.bind(client=openai_client),\n", - " human_converse\n", + " human_converse,\n", " )\n", " .with_transitions( # define the edges between the actions based on state conditions\n", " (\"human_converse\", \"AI_create_cypher_query\", default),\n", " (\"AI_create_cypher_query\", \"tool_call\", expr(\"len(tool_calls)>0\")),\n", " (\"AI_create_cypher_query\", \"human_converse\", default),\n", " (\"tool_call\", \"AI_generate_response\", default),\n", - " (\"AI_generate_response\", \"human_converse\", default)\n", + " (\"AI_generate_response\", \"human_converse\", default),\n", " )\n", " .with_identifiers(app_id=application_run_id)\n", " .with_state( # initial state\n", - " **{\"chat_history\": base_messages, \"tool_calls\": []},\n", + " **{\"chat_history\": base_messages, \"tool_calls\": []},\n", " )\n", " .with_entrypoint(\"human_converse\")\n", " .with_tracker(tracker)\n", diff --git a/examples/LLM_Workflows/NER_Example/notebook.ipynb b/examples/LLM_Workflows/NER_Example/notebook.ipynb index cd020984f..ff2c41eba 100644 --- a/examples/LLM_Workflows/NER_Example/notebook.ipynb +++ b/examples/LLM_Workflows/NER_Example/notebook.ipynb @@ -21,7 +21,8 @@ } }, "source": [ - "# How to use Lancedb with NER semantic search \\[for RAG\\] [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/NER_Example/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/NER_Example/notebook.ipynb)\n", + "# How to use Lancedb with NER semantic search \\[for RAG\\] [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/NER_Example/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/NER_Example/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\NER_Example\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\NER_Example\\notebook.ipynb)\n", + "\n", "\n", "In this post we’ll walk through an example pipeline written in Hamilton to embed some text, and also capture extra metadata about the text that can be used when deciding what data to pull for RAG. This is a form of \"semantic search\" and we use LanceDB to store our data and query over it.\n", "\n", @@ -1411,6 +1412,7 @@ ], "source": [ "from hamilton import driver, lifecycle\n", + "\n", "dr = (\n", " driver.Builder()\n", " .with_config({})\n", @@ -1544,8 +1546,9 @@ "source": [ "# now we execute it - we specify the output that we want.\n", "import lancedb\n", + "\n", "table_name = \"medium_docs\"\n", - "db_client = lancedb.connect(\"./.lancedb\")\n", + "db_client = lancedb.connect(\"./.lancedb\")\n", "\n", "results = dr.execute(\n", " [\"load_into_lancedb\"],\n", @@ -2189,11 +2192,14 @@ " .with_adapters(lifecycle.PrintLn())\n", " .build()\n", ")\n", - "dr_query.execute([\"lancedb_result\"], \n", - " inputs={\"table_name\": table_name, \n", - " \"query\": \"What is the future of autonomous vehicles?\",\n", - " \"db_client\": db_client\n", - " })" + "dr_query.execute(\n", + " [\"lancedb_result\"],\n", + " inputs={\n", + " \"table_name\": table_name,\n", + " \"query\": \"What is the future of autonomous vehicles?\",\n", + " \"db_client\": db_client,\n", + " },\n", + ")" ] }, { @@ -2265,12 +2271,8 @@ ], "source": [ "dr_query.execute(\n", - " [\"lancedb_result\"], \n", - " inputs={\n", - " \"table_name\": table_name, \n", - " \"query\": \"Who is Joe Biden?\",\n", - " \"db_client\": db_client\n", - " }\n", + " [\"lancedb_result\"],\n", + " inputs={\"table_name\": table_name, \"query\": \"Who is Joe Biden?\", \"db_client\": db_client},\n", ")" ] }, @@ -2339,12 +2341,12 @@ ], "source": [ "dr_query.execute(\n", - " [\"lancedb_result\"], \n", + " [\"lancedb_result\"],\n", " inputs={\n", - " \"table_name\": table_name, \n", + " \"table_name\": table_name,\n", " \"query\": \"How Data is changing the world?\",\n", - " \"db_client\": db_client\n", - " }\n", + " \"db_client\": db_client,\n", + " },\n", ")" ] }, @@ -2403,38 +2405,27 @@ } ], "source": [ - "from hamilton_sdk import adapters\n", - "from hamilton import driver\n", "import uuid\n", "\n", "import lancedb\n", + "\n", + "from hamilton import driver\n", + "from hamilton_sdk import adapters\n", + "\n", "table_name = \"medium_docs\"\n", - "db_client = lancedb.connect(\"./.lancedb\")\n", + "db_client = lancedb.connect(\"./.lancedb\")\n", "RUN_ID = str(uuid.uuid4())\n", "\n", "tracker = adapters.HamiltonTracker(\n", - " project_id=41, # <--- modify this \n", - " username=\"elijah@dagworks.io\", # <--- modify this \n", + " project_id=41, # <--- modify this\n", + " username=\"elijah@dagworks.io\", # <--- modify this\n", " dag_name=\"ner-lancedb-pipeline\",\n", - " tags={\"context\": \"querying\",\n", - " \"team\": \"MY_TEAM\",\n", - " \"run_id\": RUN_ID,\n", - " \"version\": \"1\"},\n", - ")\n", - "dr_query = (\n", - " driver.Builder()\n", - " .with_config({})\n", - " .with_modules(ner_module)\n", - " .with_adapters(tracker)\n", - " .build()\n", + " tags={\"context\": \"querying\", \"team\": \"MY_TEAM\", \"run_id\": RUN_ID, \"version\": \"1\"},\n", ")\n", + "dr_query = driver.Builder().with_config({}).with_modules(ner_module).with_adapters(tracker).build()\n", "dr_query.execute(\n", - " [\"lancedb_result\"], \n", - " inputs={\n", - " \"table_name\": table_name, \n", - " \"query\": \"Who is Joe Biden?\",\n", - " \"db_client\": db_client\n", - " }\n", + " [\"lancedb_result\"],\n", + " inputs={\"table_name\": table_name, \"query\": \"Who is Joe Biden?\", \"db_client\": db_client},\n", ")" ] }, diff --git a/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb b/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb index d94b93f34..17762b20c 100644 --- a/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb +++ b/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb @@ -16,7 +16,8 @@ "id": "e0633cdc", "metadata": {}, "source": [ - "# Simple Single Document Processing Pipeline Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb)\n", + "# Simple Single Document Processing Pipeline Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/RAG_document_extract_chunk_embed/simple_pipeline.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\RAG_document_extract_chunk_embed\\simple_pipeline.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\RAG_document_extract_chunk_embed\\simple_pipeline.ipynb)\n", + "\n", "\n", "\n", "This notebook shows an explains a simple document processing pipeline to feed into a RAG use case. \n", @@ -84,6 +85,7 @@ "source": [ "# set OPENAI_API_KEY if you haven't\n", "import os\n", + "\n", "os.environ[\"OPENAI_API_KEY\"] = \"MY_KEY\"" ] }, @@ -556,7 +558,9 @@ ], "source": [ "# example what the HTML chunker is doing:\n", - "doc_pipeline.html_chunker().split_text(\"

title

some text

some more text

subsection1

section text
more text

\")" + "doc_pipeline.html_chunker().split_text(\n", + " \"

title

some text

some more text

subsection1

section text
more text

\"\n", + ")" ] }, { @@ -1821,7 +1825,9 @@ ], "source": [ "# example\n", - "doc_pipeline.client().embeddings.create(input=\"this is some text that will change into a vector\", model=\"text-embedding-3-small\").data[0].embedding" + "doc_pipeline.client().embeddings.create(\n", + " input=\"this is some text that will change into a vector\", model=\"text-embedding-3-small\"\n", + ").data[0].embedding" ] }, { @@ -2381,12 +2387,13 @@ ], "source": [ "from hamilton import driver\n", + "\n", "# Note: we don't import doc_pipeline because it was being created via the magic & cells above.\n", "\n", "# create the driver\n", "pipeline_driver = driver.Builder().with_modules(doc_pipeline).build()\n", "\n", - "# execute the pipeline for the given URL \n", + "# execute the pipeline for the given URL\n", "results = pipeline_driver.execute([\"store\"], inputs={\"url\": \"https://hamilton.apache.org/\"})\n", "\n", "# show the dataframe for this document\n", @@ -3031,7 +3038,10 @@ "source": [ "import pandas as pd\n", "\n", - "urls = [\"https://hamilton.apache.org/\", \"https://hamilton.apache.org/concepts/ui\"] # some list of URLs\n", + "urls = [\n", + " \"https://hamilton.apache.org/\",\n", + " \"https://hamilton.apache.org/concepts/ui\",\n", + "] # some list of URLs\n", "dataframes = []\n", "for url in urls:\n", " results = pipeline_driver.execute([\"store\"], inputs={\"url\": url})\n", @@ -3293,8 +3303,8 @@ ], "source": [ "# now we create our parallel driver with both modules\n", - "from hamilton.execution import executors\n", "from hamilton import driver\n", + "from hamilton.execution import executors\n", "\n", "parallel_dr = (\n", " driver.Builder()\n", @@ -3917,8 +3927,11 @@ ], "source": [ "# let's execute it\n", - "urls = [\"https://hamilton.apache.org/\", \"https://hamilton.apache.org/concepts/ui\"] # some list of URLs\n", - "result = parallel_dr.execute([\"collected_dataframes\"], inputs={\"urls\":urls})\n", + "urls = [\n", + " \"https://hamilton.apache.org/\",\n", + " \"https://hamilton.apache.org/concepts/ui\",\n", + "] # some list of URLs\n", + "result = parallel_dr.execute([\"collected_dataframes\"], inputs={\"urls\": urls})\n", "result[\"collected_dataframes\"]" ] }, diff --git a/examples/LLM_Workflows/image_telephone/notebook.ipynb b/examples/LLM_Workflows/image_telephone/notebook.ipynb index 9c7a0b28b..43e4feacc 100644 --- a/examples/LLM_Workflows/image_telephone/notebook.ipynb +++ b/examples/LLM_Workflows/image_telephone/notebook.ipynb @@ -21,7 +21,8 @@ } }, "source": [ - "# Simple Telephone Application [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/image_telephone/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/image_telephone/notebook.ipynb)\n", + "# Simple Telephone Application [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/image_telephone/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/image_telephone/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\image_telephone\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\image_telephone\\notebook.ipynb)\n", + "\n", "\n", "This notebook demonstrates how to create a simple telephone application with Burr.\n", "\n", @@ -71,10 +72,11 @@ "source": [ "from typing import Tuple\n", "\n", - "from hamilton import dataflows, driver\n", - "from PIL import Image\n", "from burr.core import ApplicationBuilder, Result, State, default, expr\n", - "from burr.core.action import action" + "from burr.core.action import action\n", + "from PIL import Image\n", + "\n", + "from hamilton import dataflows, driver" ] }, { @@ -164,6 +166,7 @@ " }\n", " return result, state.update(**updates).append(image_location_history=current_image)\n", "\n", + "\n", "@action(\n", " reads=[\"current_image_caption\"],\n", " writes=[\"current_image_location\", \"image_caption_history\"],\n", @@ -213,15 +216,11 @@ "source": [ "# create the application\n", "# instantiate hamilton drivers and then bind them to the actions.\n", - "caption_image_driver = (\n", - " driver.Builder().with_config({}).with_modules(caption_images).build()\n", - ")\n", - "generate_image_driver = (\n", - " driver.Builder().with_config({}).with_modules(generate_images).build()\n", - ")\n", + "caption_image_driver = driver.Builder().with_config({}).with_modules(caption_images).build()\n", + "generate_image_driver = driver.Builder().with_config({}).with_modules(generate_images).build()\n", "\n", - "def build_app(starting_image: str = \"statemachine.png\",\n", - " number_of_images_to_caption: int = 2):\n", + "\n", + "def build_app(starting_image: str = \"statemachine.png\", number_of_images_to_caption: int = 2):\n", " _app = (\n", " ApplicationBuilder()\n", " .with_state(\n", @@ -236,7 +235,11 @@ " terminal=Result(\"image_location_history\", \"image_caption_history\"),\n", " )\n", " .with_transitions(\n", - " (\"caption\", \"terminal\", expr(f\"len(image_caption_history) == {number_of_images_to_caption}\")),\n", + " (\n", + " \"caption\",\n", + " \"terminal\",\n", + " expr(f\"len(image_caption_history) == {number_of_images_to_caption}\"),\n", + " ),\n", " (\"caption\", \"image\", default),\n", " (\"image\", \"caption\", default),\n", " )\n", @@ -246,6 +249,7 @@ " )\n", " return _app\n", "\n", + "\n", "# app = build_app(\"path/to/my/image.png\")\n", "app = build_app()" ] @@ -368,8 +372,11 @@ }, "outputs": [], "source": [ - "import requests\n", "import io\n", + "\n", + "import requests\n", + "\n", + "\n", "def display_output(image_location_history: list, image_caption_history: list):\n", " \"\"\"This function displays the images and captions.\"\"\"\n", " for image, caption in zip(image_location_history, image_caption_history):\n", diff --git a/examples/LLM_Workflows/langchain_comparison/hamilton_anthropic.py b/examples/LLM_Workflows/langchain_comparison/hamilton_anthropic.py index 984adbb0c..e21042905 100644 --- a/examples/LLM_Workflows/langchain_comparison/hamilton_anthropic.py +++ b/examples/LLM_Workflows/langchain_comparison/hamilton_anthropic.py @@ -16,7 +16,7 @@ def llm_client__anthropic() -> anthropic.Anthropic: def joke_prompt(topic: str) -> str: - return ("Human:\n\n" "Tell me a short joke about {topic}\n\n" "Assistant:").format(topic=topic) + return ("Human:\n\nTell me a short joke about {topic}\n\nAssistant:").format(topic=topic) @config.when(provider="openai") diff --git a/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb b/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb index 297b383aa..5b34aa0c4 100644 --- a/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb +++ b/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Monitor Hamilton with OpenTelemetry, OpenLLMetry and Traceloop [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb)\n", + "# Monitor Hamilton with OpenTelemetry, OpenLLMetry and Traceloop [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/observability_openllmetry/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\observability_openllmetry\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\observability_openllmetry\\notebook.ipynb)\n", + "\n", "\n", "\n", "\n", @@ -28,9 +29,11 @@ "outputs": [], "source": [ "import os\n", + "\n", + "from traceloop.sdk import Traceloop\n", + "\n", "from hamilton import driver\n", "from hamilton.plugins import h_opentelemetry\n", - "from traceloop.sdk import Traceloop\n", "\n", "%load_ext hamilton.plugins.jupyter_magic" ] diff --git a/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb b/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb index 9893feb47..ae16d7d9c 100644 --- a/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb +++ b/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb @@ -19,7 +19,8 @@ } }, "source": [ - "# Notebook showing how to run PDF summarizer on Spark [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb)\n", + "# Notebook showing how to run PDF summarizer on Spark [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/pdf_summarizer/run_on_spark/run.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\pdf_summarizer\\run_on_spark\\run.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\pdf_summarizer\\run_on_spark\\run.ipynb)\n", + "\n", "\n", "In this notebook we'll walk through what's in `run.py`, which shows how one\n", "can setup a spark job to run the PDF summarizer dataflow defined in `summarization.py`.\n", @@ -47,6 +48,7 @@ "source": [ "# imports\n", "import os\n", + "\n", "import pandas as pd\n", "import summarization\n", "from pyspark.sql import SparkSession\n", @@ -75,11 +77,12 @@ "log_setup.setup_logging(log_level=log_setup.LOG_LEVELS[\"INFO\"])\n", "# create the SparkSession -- note in real life, you'd adjust the number of executors to control parallelism.\n", "spark = SparkSession.builder.config(\n", - " \"spark.executorEnv.OPENAI_API_KEY\", openai_api_key\n", - "#).config( # you might need to following in case things don't work for you.\n", - "# \"spark.sql.warehouse.dir\", \"~/temp/dwh\"\n", - "#).master(\n", - "# \"local[1]\" # Change this in real life.\n", + " \"spark.executorEnv.OPENAI_API_KEY\",\n", + " openai_api_key,\n", + " # ).config( # you might need to following in case things don't work for you.\n", + " # \"spark.sql.warehouse.dir\", \"~/temp/dwh\"\n", + " # ).master(\n", + " # \"local[1]\" # Change this in real life.\n", ").getOrCreate()\n", "spark.sparkContext.setLogLevel(\"info\")" ] @@ -158,7 +161,7 @@ "driver_config = dict(file_type=\"pdf\")\n", "# create the Hamilton driver\n", "adapter = h_spark.PySparkUDFGraphAdapter()\n", - "dr = driver.Driver(driver_config, *modules, adapter=adapter) # can pass in multiple modules\n" + "dr = driver.Driver(driver_config, *modules, adapter=adapter) # can pass in multiple modules" ] }, { @@ -185,7 +188,7 @@ " content_type=content_type,\n", " user_query=user_query,\n", " )\n", - ")\n" + ")" ] }, { @@ -414,9 +417,7 @@ ], "source": [ "# visualize execution of what is going to be appended\n", - "dr.visualize_execution(\n", - " cols_to_append, None, None, inputs=execute_inputs\n", - ")" + "dr.visualize_execution(cols_to_append, None, None, inputs=execute_inputs)" ] }, { diff --git a/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb b/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb index 4b8454e33..9fab16f0f 100644 --- a/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb +++ b/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb @@ -21,7 +21,8 @@ } }, "source": [ - "## A basic notebook to run the pipeline defined in `doc_pipeline.py`. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb)\n", + "## A basic notebook to run the pipeline defined in `doc_pipeline.py`. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\scraping_and_chunking\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\scraping_and_chunking\\notebook.ipynb)\n", + "\n", "\n", "\n", "By default this runs parts of the pipeline in parallel using threads or processes.\n", @@ -54410,9 +54411,7 @@ " # Choose a backend to process the parallel parts of the pipeline\n", " # .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=5))\n", " # .with_remote_executor(executors.MultiProcessingExecutor(max_tasks=5))\n", - " .with_remote_executor(\n", - " h_ray.RayTaskExecutor()\n", - " ) # be sure to run ray.init() or pass in config.\n", + " .with_remote_executor(h_ray.RayTaskExecutor()) # be sure to run ray.init() or pass in config.\n", " .build()\n", ")\n", "dag = dr.display_all_functions()\n", diff --git a/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb b/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb index 2b52742d5..2b17c68de 100644 --- a/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb +++ b/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb @@ -18,7 +18,7 @@ "collapsed": false }, "source": [ - "# Shows how to run the spark pipeline. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb)\n" + "# Shows how to run the spark pipeline. [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/LLM_Workflows/scraping_and_chunking/spark/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\LLM_Workflows\\scraping_and_chunking\\spark\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\LLM_Workflows\\scraping_and_chunking\\spark\\notebook.ipynb)\n" ] }, { @@ -30,17 +30,12 @@ }, "outputs": [], "source": [ - "from hamilton import driver\n", - "\n", "import doc_pipeline\n", "import spark_pipeline\n", "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(doc_pipeline, spark_pipeline)\n", - " .with_config({})\n", - " .build()\n", - ")\n", + "from hamilton import driver\n", + "\n", + "dr = driver.Builder().with_modules(doc_pipeline, spark_pipeline).with_config({}).build()\n", "dag = dr.visualize_execution(\n", " [\"chunked_url_text\"],\n", " inputs={\"app_name\": \"chunking_spark_job\", \"num_partitions\": 4},\n", diff --git a/examples/aws/sagemaker/notebook.ipynb b/examples/aws/sagemaker/notebook.ipynb index a4692b523..b69947b51 100644 --- a/examples/aws/sagemaker/notebook.ipynb +++ b/examples/aws/sagemaker/notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Sagemaker tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/aws/sagemaker/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/aws/sagemaker/notebook.ipynb)\n" + "# Sagemaker tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/aws/sagemaker/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/aws/sagemaker/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\aws\\sagemaker\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\aws\\sagemaker\\notebook.ipynb)\n" ] }, { @@ -23,7 +23,7 @@ "metadata": {}, "outputs": [], "source": [ - "from sagemaker.processing import ScriptProcessor, ProcessingInput, ProcessingOutput" + "from sagemaker.processing import ProcessingInput, ProcessingOutput, ScriptProcessor" ] }, { @@ -33,11 +33,11 @@ "outputs": [], "source": [ "script_processor = ScriptProcessor(\n", - " command=['python3'],\n", - " image_uri='.dkr.ecr..amazonaws.com/aws-sagemaker-hamilton:latest', # Change to the actual URI\n", - " role='arn:aws:iam:::role/SageMakerScriptProcessorRole', # Change to the actual URI\n", + " command=[\"python3\"],\n", + " image_uri=\".dkr.ecr..amazonaws.com/aws-sagemaker-hamilton:latest\", # Change to the actual URI\n", + " role=\"arn:aws:iam:::role/SageMakerScriptProcessorRole\", # Change to the actual URI\n", " instance_count=1,\n", - " instance_type='ml.t3.medium'\n", + " instance_type=\"ml.t3.medium\",\n", ")" ] }, @@ -49,23 +49,17 @@ "source": [ "# All inputs and outputs should be located in /opt/ml/processing/\n", "script_processor.run(\n", - " code='processing.py',\n", + " code=\"processing.py\",\n", " inputs=[\n", - " ProcessingInput(\n", - " source='data/',\n", - " destination='/opt/ml/processing/input/data'\n", - " ),\n", - " ProcessingInput(\n", - " source='app/',\n", - " destination='/opt/ml/processing/input/code/app'\n", - " )\n", + " ProcessingInput(source=\"data/\", destination=\"/opt/ml/processing/input/data\"),\n", + " ProcessingInput(source=\"app/\", destination=\"/opt/ml/processing/input/code/app\"),\n", " ],\n", " outputs=[\n", " ProcessingOutput(\n", - " source='/opt/ml/processing/output/',\n", - " destination='s3://path/to/output/directory' # Change to the actual URI\n", + " source=\"/opt/ml/processing/output/\",\n", + " destination=\"s3://path/to/output/directory\", # Change to the actual URI\n", " )\n", - " ]\n", + " ],\n", ")" ] }, diff --git a/examples/caching/in_memory_tutorial.ipynb b/examples/caching/in_memory_tutorial.ipynb index 4b4f8ddb4..92a9af2d5 100644 --- a/examples/caching/in_memory_tutorial.ipynb +++ b/examples/caching/in_memory_tutorial.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# In-memory caching tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching/in_memory_tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching/in_memory_tutorial.ipynb)\n", + "# In-memory caching tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching/in_memory_tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching/in_memory_tutorial.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\caching\\in_memory_tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\caching\\in_memory_tutorial.ipynb)\n", + "\n", "\n", "\n", "This notebook shows how to use in-memory caching, which allows to cache results between runs without writing to disk. This uses the `InMemoryResultStore` and `InMemoryMetadataStore` classes.\n", @@ -45,6 +46,7 @@ "\n", "# avoid loading all available plugins for fast startup time\n", "from hamilton import registry\n", + "\n", "registry.disable_autoload()\n", "registry.load_extension(\"pandas\")\n", "\n", @@ -481,8 +483,8 @@ "metadata": {}, "outputs": [], "source": [ - "from hamilton.caching.stores.sqlite import SQLiteMetadataStore\n", "from hamilton.caching.stores.file import FileResultStore\n", + "from hamilton.caching.stores.sqlite import SQLiteMetadataStore\n", "\n", "path = \"./.persisted_cache\"\n", "on_disk_results = FileResultStore(path=path)\n", diff --git a/examples/caching/materializer_tutorial.ipynb b/examples/caching/materializer_tutorial.ipynb index 490075013..b3913f7a2 100644 --- a/examples/caching/materializer_tutorial.ipynb +++ b/examples/caching/materializer_tutorial.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Caching + materializers tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching/materializer_tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching/materializer_tutorial.ipynb)\n", + "# Caching + materializers tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching/materializer_tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching/materializer_tutorial.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\caching\\materializer_tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\caching\\materializer_tutorial.ipynb)\n", + "\n", "\n", "\n", "This notebook is a companion tutorial to the **Hamilton caching tutorial** notebook, which introduces caching more broadly.\n", @@ -475,19 +476,14 @@ ], "source": [ "dataloader_dataflow_dr = (\n", - " driver.Builder()\n", - " .with_modules(dataloader_dataflow_module)\n", - " .with_cache(path=CACHE_DIR)\n", - " .build()\n", + " driver.Builder().with_modules(dataloader_dataflow_module).with_cache(path=CACHE_DIR).build()\n", ")\n", "\n", "dataloader_dataflow_results = dataloader_dataflow_dr.execute(\n", - " [\"amount_per_country\", \"saved_data\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"saved_data\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "dataloader_dataflow_results = dataloader_dataflow_dr.execute(\n", - " [\"amount_per_country\", \"saved_data\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"saved_data\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(dataloader_dataflow_results[\"amount_per_country\"].head())\n", @@ -770,8 +766,7 @@ ")\n", "\n", "dataloader_driver_results = dataloader_driver_dr.execute(\n", - " [\"amount_per_country\", \"saved_data\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"saved_data\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(dataloader_driver_results[\"amount_per_country\"].head())\n", @@ -1158,15 +1153,11 @@ ], "source": [ "load_from_dataflow_dr = (\n", - " driver.Builder()\n", - " .with_modules(load_from_dataflow_module)\n", - " .with_cache(path=CACHE_DIR)\n", - " .build()\n", + " driver.Builder().with_modules(load_from_dataflow_module).with_cache(path=CACHE_DIR).build()\n", ")\n", "\n", "load_from_dataflow_results = load_from_dataflow_dr.execute(\n", - " [\"amount_per_country\", \"save.amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"save.amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(load_from_dataflow_results[\"amount_per_country\"].head())\n", @@ -1552,15 +1543,11 @@ ], "source": [ "load_from_granular_dr = (\n", - " driver.Builder()\n", - " .with_modules(load_from_granular_module)\n", - " .with_cache(path=CACHE_DIR)\n", - " .build()\n", + " driver.Builder().with_modules(load_from_granular_module).with_cache(path=CACHE_DIR).build()\n", ")\n", "\n", "load_from_granular_results = load_from_granular_dr.execute(\n", - " [\"amount_per_country\", \"save.amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"save.amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(load_from_granular_results[\"amount_per_country\"].head())\n", @@ -1942,16 +1929,12 @@ "load_from_driver_dr = (\n", " driver.Builder()\n", " .with_modules(load_from_driver_module)\n", - " .with_cache(\n", - " path=CACHE_DIR,\n", - " recompute=[\"raw_data\", \"save.amount_per_country\"]\n", - " )\n", + " .with_cache(path=CACHE_DIR, recompute=[\"raw_data\", \"save.amount_per_country\"])\n", " .build()\n", ")\n", "\n", "load_from_driver_results = load_from_driver_dr.execute(\n", - " [\"amount_per_country\", \"save.amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"save.amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(load_from_driver_results[\"amount_per_country\"].head())\n", @@ -2265,11 +2248,11 @@ " id=\"saved_data\",\n", " dependencies=[\"amount_per_country\"],\n", " path=\"saved_data.parquet\",\n", - " )\n", + " ),\n", " )\n", " .with_cache(\n", " path=CACHE_DIR,\n", - " recompute=[\"raw_data\",\"saved_data\"],\n", + " recompute=[\"raw_data\", \"saved_data\"],\n", " default_loader_behavior=\"disable\",\n", " )\n", " .build()\n", @@ -2457,8 +2440,7 @@ ], "source": [ "static_from_results = static_from_dr.execute(\n", - " [\"amount_per_country\", \"saved_data\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"saved_data\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(static_from_results[\"amount_per_country\"].head())\n", @@ -2586,10 +2568,7 @@ "dynamic_from_dr = (\n", " driver.Builder()\n", " .with_modules(from_module)\n", - " .with_cache(\n", - " path=CACHE_DIR,\n", - " recompute=[\"raw_data\", \"saved_data\"]\n", - " )\n", + " .with_cache(path=CACHE_DIR, recompute=[\"raw_data\", \"saved_data\"])\n", " .build()\n", ")\n", "dynamic_from_dr" @@ -2784,7 +2763,7 @@ " path=\"saved_data.parquet\",\n", " ),\n", " additional_vars=[\"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " inputs={\"cutoff_date\": \"2024-09-01\"},\n", ")\n", "print()\n", "print(dynamic_from_results[\"amount_per_country\"].head())\n", diff --git a/examples/caching/tutorial.ipynb b/examples/caching/tutorial.ipynb index c4d411663..1f2f1ca92 100644 --- a/examples/caching/tutorial.ipynb +++ b/examples/caching/tutorial.ipynb @@ -15,7 +15,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Hamilton caching tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching/tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching/tutorial.ipynb)\n", + "# Hamilton caching tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching/tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching/tutorial.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\caching\\tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\caching\\tutorial.ipynb)\n", + "\n", "\n", "In Hamilton, **caching** broadly refers to \"reusing results from previous executions to skip redundant computation\". If you change code or pass new data, it will automatically determine which results can be reused and which nodes need to be re-executed. This improves execution speed and reduces resource usage (computation, API credits, etc.).\n", "\n", @@ -787,8 +788,7 @@ "adding_node_dr = driver.Builder().with_modules(adding_node_module).with_cache().build()\n", "\n", "adding_node_results = adding_node_dr.execute(\n", - " [\"processed_data\", \"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"processed_data\", \"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(adding_node_results[\"amount_per_country\"].head())\n", @@ -948,8 +948,7 @@ "changing_inputs_dr = driver.Builder().with_modules(adding_node_module).with_cache().build()\n", "\n", "changing_inputs_results_1 = changing_inputs_dr.execute(\n", - " [\"processed_data\", \"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-11\"}\n", + " [\"processed_data\", \"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-11\"}\n", ")\n", "print()\n", "print(changing_inputs_results_1[\"amount_per_country\"].head())\n", @@ -1093,8 +1092,7 @@ ], "source": [ "changing_inputs_results_2 = changing_inputs_dr.execute(\n", - " [\"processed_data\", \"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-05\"}\n", + " [\"processed_data\", \"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-05\"}\n", ")\n", "print()\n", "print(changing_inputs_results_2[\"amount_per_country\"].head())\n", @@ -1323,8 +1321,7 @@ "changing_code_dr_1 = driver.Builder().with_modules(changing_code_module).with_cache().build()\n", "\n", "changing_code_results_1 = changing_code_dr_1.execute(\n", - " [\"processed_data\", \"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"processed_data\", \"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(changing_code_results_1[\"amount_per_country\"].head())\n", @@ -1511,7 +1508,9 @@ "source": [ "changing_code_dr_2 = driver.Builder().with_modules(changing_code_module_2).with_cache().build()\n", "\n", - "changing_code_results_2 = changing_code_dr_2.execute([\"processed_data\",\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"})\n", + "changing_code_results_2 = changing_code_dr_2.execute(\n", + " [\"processed_data\", \"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", + ")\n", "print()\n", "print(changing_code_results_2[\"amount_per_country\"].head())\n", "print()\n", @@ -1768,7 +1767,9 @@ "source": [ "changing_external_dr = driver.Builder().with_modules(changing_external_module).with_cache().build()\n", "\n", - "changing_external_results = changing_external_dr.execute([\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"})\n", + "changing_external_results = changing_external_dr.execute(\n", + " [\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", + ")\n", "print()\n", "print(changing_external_results[\"amount_per_country\"].head())\n", "print()\n", @@ -1909,9 +1910,16 @@ } ], "source": [ - "changing_external_with_cache_dr = driver.Builder().with_modules(changing_external_module).with_cache(recompute=[\"raw_data\"]).build()\n", + "changing_external_with_cache_dr = (\n", + " driver.Builder()\n", + " .with_modules(changing_external_module)\n", + " .with_cache(recompute=[\"raw_data\"])\n", + " .build()\n", + ")\n", "\n", - "changing_external_with_cache_results = changing_external_with_cache_dr.execute([\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"})\n", + "changing_external_with_cache_results = changing_external_with_cache_dr.execute(\n", + " [\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", + ")\n", "print()\n", "print(changing_external_with_cache_results[\"amount_per_country\"].head())\n", "print()\n", @@ -2101,15 +2109,11 @@ ], "source": [ "changing_external_decorator_dr = (\n", - " driver.Builder()\n", - " .with_modules(changing_external_decorator_module)\n", - " .with_cache()\n", - " .build()\n", + " driver.Builder().with_modules(changing_external_decorator_module).with_cache().build()\n", ")\n", "\n", "changing_external_decorator_results = changing_external_decorator_dr.execute(\n", - " [\"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(changing_external_decorator_results[\"amount_per_country\"].head())\n", @@ -2281,8 +2285,7 @@ ")\n", "\n", "recompute_all_results = recompute_all_dr.execute(\n", - " [\"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(recompute_all_results[\"amount_per_country\"].head())\n", @@ -2442,8 +2445,7 @@ ")\n", "\n", "default_behavior_results = default_behavior_dr.execute(\n", - " [\"amount_per_country\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(default_behavior_results[\"amount_per_country\"].head())\n", @@ -2839,16 +2841,10 @@ } ], "source": [ - "materializers_dr = (\n", - " driver.Builder()\n", - " .with_modules(materializers_module)\n", - " .with_cache()\n", - " .build()\n", - ")\n", + "materializers_dr = driver.Builder().with_modules(materializers_module).with_cache().build()\n", "\n", "materializers_results = materializers_dr.execute(\n", - " [\"amount_per_country\", \"saved_data\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"saved_data\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(materializers_results[\"amount_per_country\"].head())\n", @@ -3025,8 +3021,7 @@ ], "source": [ "materializers_results = materializers_dr.execute(\n", - " [\"amount_per_country\", \"saved_data\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"saved_data\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(materializers_results[\"amount_per_country\"].head())\n", @@ -3226,16 +3221,12 @@ "materializers_dr_2 = (\n", " driver.Builder()\n", " .with_modules(materializers_module)\n", - " .with_cache(\n", - " default_loader_behavior=\"recompute\",\n", - " default_saver_behavior=\"disable\"\n", - " )\n", + " .with_cache(default_loader_behavior=\"recompute\", default_saver_behavior=\"disable\")\n", " .build()\n", ")\n", "\n", "materializers_results_2 = materializers_dr_2.execute(\n", - " [\"amount_per_country\", \"saved_data\"],\n", - " inputs={\"cutoff_date\": \"2024-09-01\"}\n", + " [\"amount_per_country\", \"saved_data\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", ")\n", "print()\n", "print(materializers_results_2[\"amount_per_country\"].head())\n", @@ -3467,7 +3458,9 @@ "source": [ "cache_format_dr = driver.Builder().with_modules(cache_format_module).with_cache().build()\n", "\n", - "cache_format_results = cache_format_dr.execute([\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"})\n", + "cache_format_results = cache_format_dr.execute(\n", + " [\"amount_per_country\"], inputs={\"cutoff_date\": \"2024-09-01\"}\n", + ")\n", "print()\n", "print(cache_format_results[\"amount_per_country\"].head())\n", "print()\n", @@ -3500,8 +3493,12 @@ } ], "source": [ - "data_version = cache_format_dr.cache.data_versions[cache_format_dr.cache.last_run_id][\"processed_data\"]\n", - "parquet_path = cache_format_dr.cache.result_store._path_from_data_version(data_version).with_suffix(\".parquet\")\n", + "data_version = cache_format_dr.cache.data_versions[cache_format_dr.cache.last_run_id][\n", + " \"processed_data\"\n", + "]\n", + "parquet_path = cache_format_dr.cache.result_store._path_from_data_version(data_version).with_suffix(\n", + " \".parquet\"\n", + ")\n", "parquet_path.exists()" ] }, @@ -3795,7 +3792,9 @@ "metadata": {}, "outputs": [], "source": [ - "manual_path_dr = driver.Builder().with_modules(cache_format_module).with_cache(path=\"./my_other_cache\").build()" + "manual_path_dr = (\n", + " driver.Builder().with_modules(cache_format_module).with_cache(path=\"./my_other_cache\").build()\n", + ")" ] }, { @@ -4023,7 +4022,9 @@ } ], "source": [ - "for dep_name, dependency_data_version in decode_key(cache_key)[\"dependencies_data_versions\"].items():\n", + "for dep_name, dependency_data_version in decode_key(cache_key)[\n", + " \"dependencies_data_versions\"\n", + "].items():\n", " dep_result = interactive_dr.cache.result_store.get(dependency_data_version)\n", " print(dep_name)\n", " print(dep_result)\n", diff --git a/examples/caching_nodes/caching.ipynb b/examples/caching_nodes/caching.ipynb index a5d96087d..48446b640 100644 --- a/examples/caching_nodes/caching.ipynb +++ b/examples/caching_nodes/caching.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# First-class Caching in Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching_nodes/caching.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching_nodes/caching.ipynb)\n" + "# First-class Caching in Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching_nodes/caching.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching_nodes/caching.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\caching_nodes\\caching.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\caching_nodes\\caching.ipynb)\n" ] }, { @@ -23,9 +23,8 @@ "metadata": {}, "outputs": [], "source": [ - "from pprint import pprint\n", - "\n", "from hamilton import registry\n", + "\n", "registry.disable_autoload()\n", "registry.load_extension(\"pandas\")\n", "\n", @@ -199,12 +198,7 @@ "source": [ "from hamilton import driver\n", "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(primitives)\n", - " .with_cache()\n", - " .build()\n", - ")" + "dr = driver.Builder().with_modules(primitives).with_cache().build()" ] }, { @@ -221,11 +215,7 @@ } ], "source": [ - "results = dr.execute(\n", - " [\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\"],\n", - " inputs=dict(D=True),\n", - " overrides=dict(B=4)\n", - ")\n", + "results = dr.execute([\"A\", \"B\", \"C\", \"D\", \"E\", \"F\", \"G\"], inputs=dict(D=True), overrides=dict(B=4))\n", "print(results.keys())" ] }, diff --git a/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb b/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb index 848194842..8b37a9c5a 100644 --- a/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb +++ b/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Caching Nodes with Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb)\n" + "# Caching Nodes with Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching_nodes/caching_graph_adapter/caching_nodes.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\caching_nodes\\caching_graph_adapter\\caching_nodes.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\caching_nodes\\caching_graph_adapter\\caching_nodes.ipynb)\n" ] }, { @@ -41,6 +41,7 @@ "spends_data = [10, 10, 20, 40, 40, 50]\n", "signups_data = [1, 10, 50, 100, 200, 400]\n", "\n", + "\n", "@tag(cache=\"parquet\")\n", "def spend() -> pd.Series:\n", " \"\"\"Emulates potentially expensive data extraction.\"\"\"\n", @@ -50,7 +51,7 @@ "@tag(cache=\"parquet\")\n", "def signups() -> pd.Series:\n", " \"\"\"Emulates potentially expensive data extraction.\"\"\"\n", - " return pd.Series(signups_data)\n" + " return pd.Series(signups_data)" ] }, { @@ -89,7 +90,7 @@ "\n", "def spend_zero_mean_unit_variance(spend_zero_mean: pd.Series, spend_std_dev: float) -> pd.Series:\n", " \"\"\"Function showing one way to make spend have zero mean and unit variance.\"\"\"\n", - " return spend_zero_mean / spend_std_dev\n" + " return spend_zero_mean / spend_std_dev" ] }, { @@ -103,19 +104,16 @@ "# Also note, that using a temporary function module does not work for scaling onto Ray, Dask, or Pandas on Spark.\n", "from hamilton import ad_hoc_utils\n", "\n", - "\n", - "data_loaders = ad_hoc_utils.create_temporary_module(\n", - " spend, signups, module_name=\"data_loaders\"\n", - ")\n", + "data_loaders = ad_hoc_utils.create_temporary_module(spend, signups, module_name=\"data_loaders\")\n", "\n", "business_logic = ad_hoc_utils.create_temporary_module(\n", - " avg_3wk_spend, \n", + " avg_3wk_spend,\n", " spend_per_signup,\n", " spend_mean,\n", " spend_zero_mean,\n", " spend_std_dev,\n", - " spend_zero_mean_unit_variance, \n", - " module_name=\"business_logic\"\n", + " spend_zero_mean_unit_variance,\n", + " module_name=\"business_logic\",\n", ")" ] }, @@ -125,10 +123,10 @@ "metadata": {}, "outputs": [], "source": [ - "from hamilton import base, driver\n", - "from hamilton.experimental import h_cache\n", "import pathlib\n", - "import sys" + "\n", + "from hamilton import base, driver\n", + "from hamilton.experimental import h_cache" ] }, { @@ -185,7 +183,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Lets change the source values for our data loaders. \n", + "# Lets change the source values for our data loaders.\n", "\n", "spends_data = [i * 1000 for i in spends_data]\n", "signups_data = [i * 1000 for i in spends_data]" @@ -219,7 +217,7 @@ "# CachingGraphAdapter handles the actual caching during exection.\n", "adapter = h_cache.CachingGraphAdapter(cache_path, base.PandasDataFrameResult())\n", "\n", - "# Hamilton caches are valid accross new instances of the driver. \n", + "# Hamilton caches are valid accross new instances of the driver.\n", "dr = driver.Driver(initial_columns, business_logic, data_loaders, adapter=adapter)\n", "output_columns = [\n", " \"spend\",\n", @@ -255,7 +253,9 @@ "source": [ "# Now lets force hamilton to recompute the cached data loaders.\n", "\n", - "adapter = h_cache.CachingGraphAdapter(cache_path, base.PandasDataFrameResult(), force_compute=set([\"spend\", \"signups\"]))\n", + "adapter = h_cache.CachingGraphAdapter(\n", + " cache_path, base.PandasDataFrameResult(), force_compute=set([\"spend\", \"signups\"])\n", + ")\n", "dr = driver.Driver(initial_columns, business_logic, data_loaders, adapter=adapter)\n", "output_columns = [\n", " \"spend\",\n", diff --git a/examples/caching_nodes/diskcache_adapter/notebook.ipynb b/examples/caching_nodes/diskcache_adapter/notebook.ipynb index 799bb3b57..4e9dde945 100644 --- a/examples/caching_nodes/diskcache_adapter/notebook.ipynb +++ b/examples/caching_nodes/diskcache_adapter/notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Diskcache tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching_nodes/diskcache_adapter/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching_nodes/diskcache_adapter/notebook.ipynb)\n" + "# Diskcache tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/caching_nodes/diskcache_adapter/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/caching_nodes/diskcache_adapter/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\caching_nodes\\diskcache_adapter\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\caching_nodes\\diskcache_adapter\\notebook.ipynb)\n" ] }, { @@ -23,10 +23,10 @@ "metadata": {}, "outputs": [], "source": [ - "from hamilton import driver\n", - "from hamilton.plugins import h_diskcache\n", + "import functions\n", "\n", - "import functions" + "from hamilton import driver\n", + "from hamilton.plugins import h_diskcache" ] }, { @@ -60,12 +60,7 @@ } ], "source": [ - "dr = (\n", - " driver.Builder()\n", - " .with_modules(functions)\n", - " .with_adapters(h_diskcache.DiskCacheAdapter())\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(functions).with_adapters(h_diskcache.DiskCacheAdapter()).build()\n", "# if you ran `run.py`, you should see the nodes being\n", "# read from cache\n", "results = dr.execute([\"C\"], inputs=dict(external=10))" diff --git a/examples/contrib/notebook.ipynb b/examples/contrib/notebook.ipynb index 9339e9e7f..736f31585 100644 --- a/examples/contrib/notebook.ipynb +++ b/examples/contrib/notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Contribution tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/contrib/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/contrib/notebook.ipynb)\n" + "# Contribution tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/contrib/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/contrib/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\contrib\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\contrib\\notebook.ipynb)\n" ] }, { @@ -23,7 +23,7 @@ "metadata": {}, "outputs": [], "source": [ - "from hamilton import driver, dataflows" + "from hamilton import dataflows, driver" ] }, { @@ -472,12 +472,7 @@ } ], "source": [ - "dr = (\n", - " driver.Builder()\n", - " .with_modules(xgboost_optuna)\n", - " .with_config(dict(task=\"classification\"))\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(xgboost_optuna).with_config(dict(task=\"classification\")).build()\n", "\n", "dr.display_all_functions()" ] @@ -781,7 +776,9 @@ "\n", "# Load the Boston Housing dataset (regression example)\n", "data = load_breast_cancer()\n", - "X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.2, random_state=42)\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " data.data, data.target, test_size=0.2, random_state=42\n", + ")\n", "\n", "inputs = dict(\n", " X_train=X_train,\n", @@ -1296,12 +1293,7 @@ } ], "source": [ - "dr = (\n", - " driver.Builder()\n", - " .with_modules(xgboost_optuna)\n", - " .with_config(dict(task=\"classification\"))\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(xgboost_optuna).with_config(dict(task=\"classification\")).build()\n", "\n", "dr.display_all_functions()" ] diff --git a/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb b/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb index ed36df07d..872390bac 100644 --- a/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb +++ b/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb @@ -16,7 +16,7 @@ "id": "40ebfe63", "metadata": {}, "source": [ - "# Conttribution: Translate to Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb)\n" + "# Conttribution: Translate to Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/contrib/notebooks/dagworks-translate_to_hamilton.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\contrib\\notebooks\\dagworks-translate_to_hamilton.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\contrib\\notebooks\\dagworks-translate_to_hamilton.ipynb)\n" ] }, { @@ -28,7 +28,7 @@ }, "outputs": [], "source": [ - "from hamilton import driver, dataflows" + "from hamilton import driver" ] }, { @@ -82,6 +82,7 @@ "source": [ "# get the code\n", "import inspect\n", + "\n", "user_code = inspect.getsource(my_func)" ] }, @@ -134,12 +135,7 @@ ], "source": [ "# create a driver\n", - "dr = (\n", - " driver.Builder()\n", - " .with_config({})\n", - " .with_modules(translate_to_hamilton)\n", - " .build()\n", - ")" + "dr = driver.Builder().with_config({}).with_modules(translate_to_hamilton).build()" ] }, { @@ -156,8 +152,8 @@ "outputs": [], "source": [ "result = dr.execute(\n", - " [\"code_segments\", \"translated_code_response\"], # request these as outputs\n", - " inputs={\"user_code\": user_code, \"model_name\": \"gpt-4-1106-preview\"}\n", + " [\"code_segments\", \"translated_code_response\"], # request these as outputs\n", + " inputs={\"user_code\": user_code, \"model_name\": \"gpt-4-1106-preview\"},\n", ")" ] }, diff --git a/examples/dagster/hamilton_code/notebook.ipynb b/examples/dagster/hamilton_code/notebook.ipynb index e3e1958c3..f9f14613a 100644 --- a/examples/dagster/hamilton_code/notebook.ipynb +++ b/examples/dagster/hamilton_code/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Dagster comparison [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dagster/hamilton_code/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dagster/hamilton_code/notebook.ipynb)\n", + "# Dagster comparison [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dagster/hamilton_code/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dagster/hamilton_code/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\dagster\\hamilton_code\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\dagster\\hamilton_code\\notebook.ipynb)\n", + "\n", "\n", "\n", "This notebook shows how you can use Hamilton in a notebook for interactive development. The code is similar to the content of `run.py`.\n", @@ -35,13 +36,11 @@ "metadata": {}, "outputs": [], "source": [ + "import dataflow # dataflow definition\n", "from IPython.display import display\n", - "from hamilton import driver\n", - "from hamilton.io.materialization import to\n", - "from hamilton.plugins import matplotlib_extensions \n", "from mock_api import DataGeneratorResource\n", "\n", - "import dataflow # dataflow definition" + "from hamilton import driver" ] }, { diff --git a/examples/dask/community_demo/demo_day_notebook.ipynb b/examples/dask/community_demo/demo_day_notebook.ipynb index 671d7f422..17f19e792 100644 --- a/examples/dask/community_demo/demo_day_notebook.ipynb +++ b/examples/dask/community_demo/demo_day_notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Dask tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dask/community_demo/demo_day_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dask/community_demo/demo_day_notebook.ipynb)\n" + "# Dask tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dask/community_demo/demo_day_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dask/community_demo/demo_day_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\dask\\community_demo\\demo_day_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\dask\\community_demo\\demo_day_notebook.ipynb)\n" ] }, { @@ -26,9 +26,7 @@ "# Cell 1 - import the things you need\n", "import logging\n", "import sys\n", - "import time\n", "\n", - "import numpy as np\n", "import pandas as pd\n", "\n", "from hamilton import base, driver\n", @@ -62,9 +60,11 @@ ], "source": [ "# Cell 2 - import modules to create part of the DAG from\n", - "import my_functions\n", "import importlib\n", - "importlib.reload(my_functions) # rerun this cell if you update `my_functions.py`" + "\n", + "import my_functions\n", + "\n", + "importlib.reload(my_functions) # rerun this cell if you update `my_functions.py`" ] }, { @@ -391,9 +391,13 @@ ], "source": [ "# Cell 5 - we can visualize just the execution path\n", - "dr.visualize_execution(output_columns,\n", - " inputs={\"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n", - " \"spend\": pd.Series([10, 10, 20, 40, 40, 50])}) # we pass None to not save the image to file." + "dr.visualize_execution(\n", + " output_columns,\n", + " inputs={\n", + " \"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n", + " \"spend\": pd.Series([10, 10, 20, 40, 40, 50]),\n", + " },\n", + ") # we pass None to not save the image to file." ] }, { @@ -497,8 +501,9 @@ ], "source": [ "# Cell 6 - we can visualize the path of execution between two functions\n", - "dr.visualize_path_between(\"spend_mean\", \"spend_zero_mean_unit_variance\",\n", - " strict_path_visualization=False)" + "dr.visualize_path_between(\n", + " \"spend_mean\", \"spend_zero_mean_unit_variance\", strict_path_visualization=False\n", + ")" ] }, { @@ -627,11 +632,15 @@ "%%time\n", "# Cell 7 - we can execute the DAG\n", "# let's create the dataframe!\n", - "df = dr.execute(output_columns, \n", - " inputs={\"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n", - " \"spend\": pd.Series([10, 10, 20, 40, 40, 50])})\n", + "df = dr.execute(\n", + " output_columns,\n", + " inputs={\n", + " \"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n", + " \"spend\": pd.Series([10, 10, 20, 40, 40, 50]),\n", + " },\n", + ")\n", "# it should take 9 seconds to compute due to the three 3 second sleeps.\n", - "df " + "df" ] }, { @@ -687,9 +696,12 @@ "# Cell 8 - Set up dask locally\n", "# Dask graph adapter -- let's distribute the functions!\n", "import logging\n", - "from hamilton import base, driver\n", - "from hamilton.plugins import h_dask\n", + "\n", "from dask.distributed import Client, LocalCluster\n", + "\n", + "from hamilton import driver\n", + "from hamilton.plugins import h_dask\n", + "\n", "logger = logging.getLogger(\"notebook_logger\")\n", "# Setup a local cluster.\n", "# By default this sets up 1 worker per core\n", @@ -831,9 +843,12 @@ "%%time\n", "# Cell 10 - run the DAG again, but this time with dask\n", "df2 = dr2.execute(\n", - " output_columns, \n", - " inputs={\"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n", - " \"spend\": pd.Series([10, 10, 20, 40, 40, 50])})\n", + " output_columns,\n", + " inputs={\n", + " \"signups\": pd.Series([1, 10, 50, 100, 200, 400]),\n", + " \"spend\": pd.Series([10, 10, 20, 40, 40, 50]),\n", + " },\n", + ")\n", "df2" ] }, diff --git a/examples/dask/hello_world/business_logic.py b/examples/dask/hello_world/business_logic.py index b4f559c0d..8255669a3 120000 --- a/examples/dask/hello_world/business_logic.py +++ b/examples/dask/hello_world/business_logic.py @@ -1 +1 @@ -../../hello_world/my_functions.py \ No newline at end of file +../../hello_world/my_functions.py diff --git a/examples/dask/hello_world/notebook.ipynb b/examples/dask/hello_world/notebook.ipynb index 7caaf82c9..ab8ed6b52 100644 --- a/examples/dask/hello_world/notebook.ipynb +++ b/examples/dask/hello_world/notebook.ipynb @@ -19,7 +19,8 @@ } }, "source": [ - "# Hello world tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dask/hello_world/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dask/hello_world/notebook.ipynb)\n", + "# Hello world tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dask/hello_world/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dask/hello_world/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\dask\\hello_world\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\dask\\hello_world\\notebook.ipynb)\n", + "\n", "\n", "\n", "Uncomment and run the cell below if you are in a Google Colab environment. It will:\n", @@ -107,8 +108,6 @@ "source": [ "# Import modules\n", "\n", - "import pandas as pd\n", - "from dask import dataframe\n", "from dask.distributed import (\n", " Client,\n", " LocalCluster,\n", @@ -263,7 +262,7 @@ "# Set up the local Dask cluster, adapter, and driver.\n", "\n", "cluster = LocalCluster()\n", - "client = Client(cluster)\n", + "client = Client(cluster)\n", "\n", "print(client.cluster)\n", "\n", diff --git a/examples/data_loaders/data_loaders.ipynb b/examples/data_loaders/data_loaders.ipynb index a905e5226..82911bfde 100644 --- a/examples/data_loaders/data_loaders.ipynb +++ b/examples/data_loaders/data_loaders.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Data Loaders Overview [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/data_loaders/data_loaders.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/data_loaders/data_loaders.ipynb)\n", + "# Data Loaders Overview [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/data_loaders/data_loaders.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/data_loaders/data_loaders.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\data_loaders\\data_loaders.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\data_loaders\\data_loaders.ipynb)\n", + "\n", "\n", "\n", "This example demonstrates how to build scalable and maintainable Extract-Transform-Load (ETL) pipelines using Hamilton, focusing on the Extract phase. Hamilton allows for easy switching between different data sources without the need for cluttered if-else statements, making the dataflow modular and easier to maintain.\n", @@ -27,7 +28,7 @@ "\n", "- Mock Data Loader: Generates mock data on the fly, useful for unit testing or quick iterations.\n", "- CSV Data Loader: Reads data from CSV files, useful for ad-hoc research.\n", - "- DuckDB Data Loader: Loads data from a DuckDB database, representing more production-ready data pipelines.\n" + "- DuckDB Data Loader: Loads data from a DuckDB database, representing more production-ready data pipelines." ] }, { @@ -80,8 +81,9 @@ } ], "source": [ - "from hamilton import base\n", "import hamilton.driver\n", + "from hamilton import base\n", + "\n", "%load_ext hamilton.plugins.jupyter_magic" ] }, @@ -1604,15 +1606,18 @@ "source": [ "# Execution for duckdb data\n", "\n", - "driver = (hamilton.driver.Builder()\n", - " .with_modules(load_data_duckdb, prep_data)\n", - " .with_adapters(base.PandasDataFrameResult())\n", - " .build()\n", + "driver = (\n", + " hamilton.driver.Builder()\n", + " .with_modules(load_data_duckdb, prep_data)\n", + " .with_adapters(base.PandasDataFrameResult())\n", + " .build()\n", ")\n", "# Uncomment the print function below to display the execution result when have manually handled connections/whatnot for duckdb\n", - "# print(driver.execute(VARS)) \n", + "# print(driver.execute(VARS))\n", "\n", - "duckdb_execution_graph = driver.visualize_execution(VARS, inputs={\"db_path\": \"./test_data/database.duckdb\"})\n", + "duckdb_execution_graph = driver.visualize_execution(\n", + " VARS, inputs={\"db_path\": \"./test_data/database.duckdb\"}\n", + ")\n", "display(duckdb_execution_graph)" ] }, @@ -2194,10 +2199,11 @@ "source": [ "# Execution for csv data\n", "\n", - "driver = (hamilton.driver.Builder()\n", - " .with_modules(load_data_csv, prep_data)\n", - " .with_adapters(base.PandasDataFrameResult())\n", - " .build()\n", + "driver = (\n", + " hamilton.driver.Builder()\n", + " .with_modules(load_data_csv, prep_data)\n", + " .with_adapters(base.PandasDataFrameResult())\n", + " .build()\n", ")\n", "print(driver.execute(VARS))\n", "\n", @@ -2665,10 +2671,11 @@ "source": [ "# Execution for mock data\n", "\n", - "driver = (hamilton.driver.Builder()\n", - " .with_modules(load_data_mock, prep_data)\n", - " .with_adapters(base.PandasDataFrameResult())\n", - " .build()\n", + "driver = (\n", + " hamilton.driver.Builder()\n", + " .with_modules(load_data_mock, prep_data)\n", + " .with_adapters(base.PandasDataFrameResult())\n", + " .build()\n", ")\n", "print(driver.execute(VARS))\n", "\n", diff --git a/examples/dlt/dlt_plugin.ipynb b/examples/dlt/dlt_plugin.ipynb index f3e34577c..4aeb57895 100644 --- a/examples/dlt/dlt_plugin.ipynb +++ b/examples/dlt/dlt_plugin.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# dlt plugin for Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dlt/dlt_plugin.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dlt/dlt_plugin.ipynb)\n", + "# dlt plugin for Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dlt/dlt_plugin.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dlt/dlt_plugin.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\dlt\\dlt_plugin.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\dlt\\dlt_plugin.ipynb)\n", + "\n", "\n", "This notebook shows how to use Hamilton [materializers](https://hamilton.apache.org/concepts/materialization/) to move data between Hamilton and dlt.\n", "\n", @@ -33,9 +34,9 @@ "%load_ext hamilton.plugins.jupyter_magic\n", "\n", "import dlt\n", + "\n", "from hamilton import driver\n", - "from hamilton.io.materialization import to, from_\n", - "from hamilton.plugins import dlt_extensions" + "from hamilton.io.materialization import from_, to" ] }, { @@ -414,13 +415,14 @@ "@dlt.source\n", "def mock_source():\n", " iterable_data = [{\"col\": 1}, {\"col\": 2}, {\"col\": 3}] * 100\n", - " \n", + "\n", " @dlt.resource\n", " def mock_resource():\n", " yield from iterable_data\n", - " \n", + "\n", " yield mock_resource\n", - " \n", + "\n", + "\n", "my_mock_source = mock_source()" ] }, @@ -459,10 +461,7 @@ " ),\n", "]\n", "\n", - "metadata, _ = dr.materialize(\n", - " *materializers,\n", - " additional_vars=[\"print_df_head\"]\n", - ")" + "metadata, _ = dr.materialize(*materializers, additional_vars=[\"print_df_head\"])" ] } ], diff --git a/examples/dlt/notebook.ipynb b/examples/dlt/notebook.ipynb index a399bd088..ddf971978 100644 --- a/examples/dlt/notebook.ipynb +++ b/examples/dlt/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Slack Summaries [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dlt/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dlt/notebook.ipynb)\n", + "# Slack Summaries [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/dlt/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/dlt/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\dlt\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\dlt\\notebook.ipynb)\n", + "\n", "\n", "This notebook shows how to ingest Slack messages and generate threads summaries.\n", "\n", @@ -51,7 +52,7 @@ "\n", "slack_pipeline = dlt.pipeline(\n", " pipeline_name=\"slack\",\n", - " destination='duckdb',\n", + " destination=\"duckdb\",\n", " dataset_name=\"slack_data\",\n", " full_refresh=True,\n", ")\n", @@ -327,9 +328,10 @@ "metadata": {}, "outputs": [], "source": [ - "from hamilton import driver\n", "import jupyter_transform\n", "\n", + "from hamilton import driver\n", + "\n", "dr = (\n", " driver.Builder()\n", " .enable_dynamic_execution(allow_experimental_mode=True)\n", @@ -357,7 +359,7 @@ "inputs = dict(\n", " pipeline=slack_pipeline,\n", " selected_channels=[\"general\", \"dlt\"],\n", - ") \n", + ")\n", "final_vars = [\"threads.with_aggregate_thread\"]\n", "\n", "results = dr.execute(final_vars, inputs=inputs)\n", @@ -380,8 +382,8 @@ "metadata": {}, "outputs": [], "source": [ - "import os \n", "import getpass\n", + "import os\n", "\n", "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass(\"Enter your OpenAI key\")" ] @@ -392,7 +394,7 @@ "metadata": {}, "outputs": [], "source": [ - "final_vars = [\"threads\"] # replace by `\"insert_threads\"` to directly store results\n", + "final_vars = [\"threads\"] # replace by `\"insert_threads\"` to directly store results\n", "results = dr.execute(final_vars, inputs=inputs)\n", "df2 = results[\"threads\"].to_pandas()\n", "\n", diff --git a/examples/due_date_probabilities/notebook.ipynb b/examples/due_date_probabilities/notebook.ipynb index 6c4879bc3..b6bc06494 100644 --- a/examples/due_date_probabilities/notebook.ipynb +++ b/examples/due_date_probabilities/notebook.ipynb @@ -16,7 +16,8 @@ "id": "5b2f7a6c", "metadata": {}, "source": [ - "# Modeling due date probabilities [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/due_date_probabilities/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/due_date_probabilities/notebook.ipynb)\n", + "# Modeling due date probabilities [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/due_date_probabilities/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/due_date_probabilities/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\due_date_probabilities\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\due_date_probabilities\\notebook.ipynb)\n", + "\n", "\n", "\n", "In this notebook we model the probability of going into labor, given the start date of a pregnancy. We do a few things:\n", @@ -65,18 +66,19 @@ "metadata": {}, "outputs": [], "source": [ - "import calendar\n", - "from IPython.display import HTML,display_html \n", + "import datetime\n", "from calendar import HTMLCalendar\n", + "\n", "import pandas as pd\n", - "import datetime\n", + "from IPython.display import HTML, display_html\n", + "\n", "\n", "class HighlightedCalendar(HTMLCalendar):\n", " def __init__(self, highlight=[], normalized_scale=[], *args, **kwargs):\n", " super().__init__(*args, **kwargs)\n", " self._highlight = highlight\n", " self._normalized_scale = normalized_scale\n", - " \n", + "\n", " def formatday(self, day, weekday):\n", " \"\"\"\n", " Return a day as a table cell.\n", @@ -84,27 +86,33 @@ " if day in self._highlight:\n", " index = self._highlight.index(day)\n", " if len(self._normalized_scale) > 0:\n", - " alpha = self._normalized_scale[index]*.3\n", + " alpha = self._normalized_scale[index] * 0.3\n", " else:\n", " alpha = 0.3\n", " return f'{day}'\n", " else:\n", " return super().formatday(day, weekday)\n", "\n", + "\n", "def view_date_range(date_range: pd.Series, scale: pd.Series = None):\n", " # normalize\n", " if scale is not None:\n", " max_scale = max(scale)\n", " min_scale = min(scale)\n", - " scale = min_scale + (scale-min_scale)/max_scale\n", + " scale = min_scale + (scale - min_scale) / max_scale\n", " else:\n", " scale = 1\n", " html = \"\"\n", " month_year_combos = {(date.year, date.month) for date in date_range}\n", " combined_df = pd.DataFrame(dict(dates=date_range, scale=scale))\n", " for year, month in month_year_combos:\n", - " filtered_df = combined_df[(combined_df.dates.dt.year == year) & (combined_df.dates.dt.month == month)]\n", - " cal = HighlightedCalendar(highlight=list([item.day for item in filtered_df.dates]), normalized_scale=list(filtered_df.scale))\n", + " filtered_df = combined_df[\n", + " (combined_df.dates.dt.year == year) & (combined_df.dates.dt.month == month)\n", + " ]\n", + " cal = HighlightedCalendar(\n", + " highlight=list([item.day for item in filtered_df.dates]),\n", + " normalized_scale=list(filtered_df.scale),\n", + " )\n", " html += f\"\"\n", "\n", " html += \"
{cal.formatmonth(year, month)}
\"\n", @@ -133,7 +141,7 @@ "# Start date of the pregnancy (first of the last period)\n", "PREGNANCY_START_DATE = datetime.datetime.strptime(\"20231012\", \"%Y%m%d\")\n", "\n", - "# Today -- set to the past if you don't want a true conditional probability \n", + "# Today -- set to the past if you don't want a true conditional probability\n", "# TODAY = datetime.datetime.today()\n", "TODAY = datetime.datetime.strptime(\"20240713\", \"%Y%m%d\")" ] @@ -557,16 +565,21 @@ } ], "source": [ - "from hamilton import driver, base\n", - "from hamilton import log_setup\n", - "\n", + "from hamilton import base, driver\n", "\n", - "dr = driver.Builder().with_modules(base_dates, probabilities).with_adapters(base.PandasDataFrameResult()).build()\n", + "dr = (\n", + " driver.Builder()\n", + " .with_modules(base_dates, probabilities)\n", + " .with_adapters(base.PandasDataFrameResult())\n", + " .build()\n", + ")\n", "delivery_probabilities = dr.execute(\n", - " [\"possible_dates\", \"probability_before_date\", \"probability_on_date\"], \n", - " inputs={\"start_date\" : PREGNANCY_START_DATE, \"current_date\": TODAY}\n", + " [\"possible_dates\", \"probability_before_date\", \"probability_on_date\"],\n", + " inputs={\"start_date\": PREGNANCY_START_DATE, \"current_date\": TODAY},\n", ")\n", - "view_date_range(delivery_probabilities[\"possible_dates\"], delivery_probabilities[\"probability_on_date\"])" + "view_date_range(\n", + " delivery_probabilities[\"possible_dates\"], delivery_probabilities[\"probability_on_date\"]\n", + ")" ] }, { @@ -594,13 +607,17 @@ "\n", "fig, axes = plt.subplots(nrows=2, ncols=1, figsize=(10, 8)) # Adjust the figure size as necessary\n", "\n", - "sns.lineplot(ax=axes[0], data=delivery_probabilities['probability_on_date']).set_title('Delivery Probability on Date (X)')\n", - "axes[0].set_ylabel('Probability')\n", - "axes[0].set_xlabel('Date')\n", + "sns.lineplot(ax=axes[0], data=delivery_probabilities[\"probability_on_date\"]).set_title(\n", + " \"Delivery Probability on Date (X)\"\n", + ")\n", + "axes[0].set_ylabel(\"Probability\")\n", + "axes[0].set_xlabel(\"Date\")\n", "\n", - "sns.lineplot(ax=axes[1], data=delivery_probabilities['probability_before_date']).set_title('Delivery Probability Before Date (X)')\n", - "axes[1].set_ylabel('Probability')\n", - "axes[1].set_xlabel('Date')\n", + "sns.lineplot(ax=axes[1], data=delivery_probabilities[\"probability_before_date\"]).set_title(\n", + " \"Delivery Probability Before Date (X)\"\n", + ")\n", + "axes[1].set_ylabel(\"Probability\")\n", + "axes[1].set_xlabel(\"Date\")\n", "\n", "plt.tight_layout()\n", "\n", diff --git a/examples/experiment_management/notebook.ipynb b/examples/experiment_management/notebook.ipynb index 510f30a08..438a30b62 100644 --- a/examples/experiment_management/notebook.ipynb +++ b/examples/experiment_management/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Exploring tracked experiments [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/experiment_management/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/experiment_management/notebook.ipynb)\n", + "# Exploring tracked experiments [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/experiment_management/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/experiment_management/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\experiment_management\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\experiment_management\\notebook.ipynb)\n", + "\n", "\n", "This notebook shows how to load data from your local repository once you executed `run.py` a few times. Note that you will need to update `run_id` values to match your data." ] @@ -29,7 +30,8 @@ "import pickle\n", "\n", "import pandas as pd\n", - "from IPython.display import display, Image\n", + "from IPython.display import Image, display\n", + "\n", "from hamilton.plugins.h_experiments.cache import JsonCache" ] }, @@ -589,12 +591,10 @@ } ], "source": [ - "from sklearn.linear_model import LinearRegression\n", - "\n", - "def load_artifact(materialize_info, strict = False):\n", + "def load_artifact(materialize_info, strict=False):\n", " sink = materialize_info[\"sink\"]\n", " path = materialize_info[\"path\"]\n", - " \n", + "\n", " if sink == \"pickle\":\n", " artifact = pickle.load(open(path, \"rb\"))\n", " elif sink == \"parquet\":\n", @@ -603,7 +603,7 @@ " if strict:\n", " raise TypeError(f\"Can't load artifact of type {sink}\")\n", " artifact = None\n", - " \n", + "\n", " return artifact\n", "\n", "\n", diff --git a/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb b/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb index 2f75af7b8..aeb8edade 100644 --- a/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb +++ b/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Feature Retrieval [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb)\n", + "# Feature Retrieval [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/feast/integration_feature_store/feature_repo/retrieval.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\feast\\integration_feature_store\\feature_repo\\retrieval.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\feast\\integration_feature_store\\feature_repo\\retrieval.ipynb)\n", + "\n", "\n", "This notebook shows the basic of feature retrieval using Feast. The `retrieval.ipynb` notebook in `/integration_feature_store/feature_repo` shows more involved usage." ] @@ -29,6 +30,7 @@ "!python run.py\n", "\n", "from IPython.display import clear_output\n", + "\n", "clear_output() # clears the jupyter cell output" ] }, @@ -51,13 +53,13 @@ "source": [ "import datetime\n", "\n", + "import demo_inputs # hard-coded inputs for demo\n", "import feast\n", - "from hamilton import base, driver\n", - "\n", "import feature_transformations\n", "import store_definitions\n", "import store_operations # Feast operations as Hamilton DAG\n", - "import demo_inputs # hard-coded inputs for demo\n", + "\n", + "from hamilton import base, driver\n", "\n", "%load_ext autoreload\n", "%autoreload 1\n", @@ -81,7 +83,7 @@ " dict(),\n", " # feature_transformations,\n", " store_definitions,\n", - " adapter=base.SimplePythonGraphAdapter(base.DictResult())\n", + " adapter=base.SimplePythonGraphAdapter(base.DictResult()),\n", ")\n", "\n", "dr.display_all_functions(\n", @@ -122,8 +124,8 @@ "metadata": {}, "outputs": [], "source": [ - "# define a retrieval job for entities specified by id and timestamp \n", - "# `features` specifies columns to retrieve; can be feast.FeatureView, \n", + "# define a retrieval job for entities specified by id and timestamp\n", + "# `features` specifies columns to retrieve; can be feast.FeatureView,\n", "# feast.FeatureService, or column names as string\n", "job: feast.infra.offline_stores.offline_store.RetrievalJob = feature_store.get_historical_features(\n", " entity_df=demo_inputs.HISTORICAL_ENTITY_DF,\n", diff --git a/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb b/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb index 16ae419dd..59e4d18b6 100644 --- a/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb +++ b/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# MPG Simple [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb)\n" + "# MPG Simple [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimple.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\MPGSimple.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\MPGSimple.ipynb)\n" ] }, { @@ -34,8 +34,9 @@ }, "outputs": [], "source": [ - "from hamilton import driver\n", - "from IPython.display import HTML, display" + "from IPython.display import HTML, display\n", + "\n", + "from hamilton import driver" ] }, { @@ -160,8 +161,7 @@ "outputs": [], "source": [ "# Visualize Overrides\n", - "dr.visualize_execution([\"evaluated_model\"], \n", - " overrides={\"linear_model\": result[\"linear_model\"]})\n" + "dr.visualize_execution([\"evaluated_model\"], overrides={\"linear_model\": result[\"linear_model\"]})" ] }, { @@ -179,8 +179,7 @@ "outputs": [], "source": [ "# execute with overrides\n", - "dr.execute([\"evaluated_model\"], \n", - " overrides={\"linear_model\": result[\"linear_model\"]})" + "dr.execute([\"evaluated_model\"], overrides={\"linear_model\": result[\"linear_model\"]})" ] }, { diff --git a/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb b/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb index 048adfea4..2897fddd1 100644 --- a/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb +++ b/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# MPG Simple Advanced Target [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb)\n" + "# MPG Simple Advanced Target [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleAdvancedTarget.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\MPGSimpleAdvancedTarget.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\MPGSimpleAdvancedTarget.ipynb)\n" ] }, { @@ -49,8 +49,7 @@ } ], "source": [ - "from hamilton import driver\n", - "from IPython.display import HTML, display" + "from hamilton import driver" ] }, { @@ -822,8 +821,7 @@ ], "source": [ "# Visualize Overrides\n", - "dr.visualize_execution([\"evaluated_model\"], \n", - " overrides={\"linear_model\": result[\"linear_model\"]})" + "dr.visualize_execution([\"evaluated_model\"], overrides={\"linear_model\": result[\"linear_model\"]})" ] }, { diff --git a/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb b/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb index 45dbe2e5c..88ae2b3c1 100644 --- a/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb +++ b/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# MPG Simple Target [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb)\n" + "# MPG Simple Target [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/MPGSimpleTarget.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\MPGSimpleTarget.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\MPGSimpleTarget.ipynb)\n" ] }, { @@ -47,8 +47,7 @@ } ], "source": [ - "from hamilton import driver\n", - "from IPython.display import HTML, display" + "from hamilton import driver" ] }, { @@ -544,8 +543,7 @@ ], "source": [ "# Visualize Overrides\n", - "dr.visualize_execution([\"evaluated_model\"], \n", - " overrides={\"linear_model\": result[\"linear_model\"]})" + "dr.visualize_execution([\"evaluated_model\"], overrides={\"linear_model\": result[\"linear_model\"]})" ] }, { diff --git a/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb b/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb index 9b8900e52..fd319e498 100644 --- a/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb +++ b/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Procedural MPG Notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb)\n" + "# Procedural MPG Notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton-tutorials/mpg-translation/ProceduralMPGNotebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\ProceduralMPGNotebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\hamilton-tutorials\\mpg-translation\\ProceduralMPGNotebook.ipynb)\n" ] }, { @@ -26,8 +26,8 @@ "import numpy as np\n", "import pandas as pd\n", "from sklearn.linear_model import LinearRegression\n", - "from sklearn.preprocessing import StandardScaler\n", - "from sklearn.metrics import mean_absolute_error\n" + "from sklearn.metrics import mean_absolute_error\n", + "from sklearn.preprocessing import StandardScaler" ] }, { @@ -117,7 +117,7 @@ "seed = 123\n", "# split the pandas dataframe into train and test\n", "train_dataset = raw_dataset.sample(frac=train_test_split, random_state=seed)\n", - "test_dataset = raw_dataset.drop(train_dataset.index)\n" + "test_dataset = raw_dataset.drop(train_dataset.index)" ] }, { @@ -206,9 +206,7 @@ "# Predict and evaluate the model\n", "test_pred = linear_model.predict(test_dataset_scaled)\n", "mae = mean_absolute_error(test_labels, test_pred)\n", - "test_results = {\n", - " \"linear_model\": mae\n", - "}" + "test_results = {\"linear_model\": mae}" ] }, { diff --git a/examples/hamilton_ui/notebook.ipynb b/examples/hamilton_ui/notebook.ipynb index a14664cfd..721de922c 100644 --- a/examples/hamilton_ui/notebook.ipynb +++ b/examples/hamilton_ui/notebook.ipynb @@ -16,7 +16,8 @@ "id": "a492fd8c", "metadata": {}, "source": [ - "# Running the DAG in your notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton_ui/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton_ui/notebook.ipynb)\n", + "# Running the DAG in your notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hamilton_ui/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hamilton_ui/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\hamilton_ui\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\hamilton_ui\\notebook.ipynb)\n", + "\n", "\n", "\n", "This notebook uses the existing module definitions to build the DAG and run it in this notebook." @@ -35,11 +36,11 @@ "outputs": [], "source": [ "from components import feature_transforms, iris_loader, models\n", - "from hamilton_sdk import adapters\n", "\n", "from hamilton import driver as h_driver\n", "from hamilton.io.materialization import to\n", - "from hamilton.lifecycle import PrintLnHook" + "from hamilton.lifecycle import PrintLnHook\n", + "from hamilton_sdk import adapters" ] }, { @@ -1050,7 +1051,7 @@ "config = {\"case\": \"parquet\"}\n", "dag_name = \"machine_learning_dag\"\n", "email = \"elijah@dagworks.io\" # your email\n", - "project_id = 38 # your project id\n", + "project_id = 38 # your project id\n", "\n", "# create tracker object\n", "tracker = adapters.HamiltonTracker(\n", @@ -1090,10 +1091,10 @@ "inputs = {}\n", "outputs_to_materialize = [\n", " to.parquet(\n", - " id=\"data_set_v1_saver\",\n", - " path=\"data_set_v1.parquet\",\n", - " dependencies=[\"data_set_v1\"],\n", - " ),\n", + " id=\"data_set_v1_saver\",\n", + " path=\"data_set_v1.parquet\",\n", + " dependencies=[\"data_set_v1\"],\n", + " ),\n", " to.pickle(\n", " id=\"svm_model_saver\",\n", " path=\"svm_model.pkl\",\n", @@ -1103,7 +1104,7 @@ " id=\"lr_model_saver\",\n", " path=\"lr_model.pkl\",\n", " dependencies=[\"lr_model\"],\n", - " )\n", + " ),\n", "]" ] }, diff --git a/examples/hello_world/my_notebook.ipynb b/examples/hello_world/my_notebook.ipynb index 8e9edd954..f59a728a1 100644 --- a/examples/hello_world/my_notebook.ipynb +++ b/examples/hello_world/my_notebook.ipynb @@ -19,7 +19,8 @@ } }, "source": [ - "# Hello World Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hello_world/my_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hello_world/my_notebook.ipynb)\n", + "# Hello World Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/hello_world/my_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/hello_world/my_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\hello_world\\my_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\hello_world\\my_notebook.ipynb)\n", + "\n", "This uses the jupyter magic commands to create a simple example of how to use Hamilton." ] }, @@ -41,6 +42,7 @@ "# Cell 1 - import the things you need\n", "import logging\n", "import sys\n", + "\n", "from hamilton import driver\n", "\n", "%load_ext hamilton.plugins.jupyter_magic\n", @@ -762,7 +764,7 @@ ], "source": [ "# visualize just the execution path\n", - "dr.visualize_execution(output_columns) # no other args needed for jupyter" + "dr.visualize_execution(output_columns) # no other args needed for jupyter" ] }, { @@ -888,8 +890,9 @@ ], "source": [ "# visualize the path of execution between two functions\n", - "dr.visualize_path_between(\"spend_mean\", \"spend_zero_mean_unit_variance\",\n", - " strict_path_visualization=False)" + "dr.visualize_path_between(\n", + " \"spend_mean\", \"spend_zero_mean_unit_variance\", strict_path_visualization=False\n", + ")" ] }, { diff --git a/examples/jupyter_notebook_magic/example.ipynb b/examples/jupyter_notebook_magic/example.ipynb index 648ba23b8..531b82a29 100644 --- a/examples/jupyter_notebook_magic/example.ipynb +++ b/examples/jupyter_notebook_magic/example.ipynb @@ -25,7 +25,8 @@ "id": "52cb3087", "metadata": {}, "source": [ - "# Hamilton notebook extension [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/jupyter_notebook_magic/example.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/jupyter_notebook_magic/example.ipynb)\n", + "# Hamilton notebook extension [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/jupyter_notebook_magic/example.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/jupyter_notebook_magic/example.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\jupyter_notebook_magic\\example.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\jupyter_notebook_magic\\example.ipynb)\n", + "\n", "\n", "Jupyter magics are commands that can be executed in notebooks using `%` and `%%` in code cells.\n", "- **Line magics** start with `%` and apply to the current line\n", @@ -68,6 +69,7 @@ "source": [ "# disable plugin autoloading for faster notebook start time\n", "from hamilton import registry\n", + "\n", "registry.disable_autoload()\n", "\n", "%reload_ext hamilton.plugins.jupyter_magic\n", @@ -96,7 +98,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[0;31mDocstring:\u001B[0m\n", + "\u001b[0;31mDocstring:\u001b[0m\n", "::\n", "\n", " %cell_to_module [-m [MODULE_NAME]] [-d [DISPLAY]] [--display_cache]\n", @@ -150,7 +152,7 @@ " -w <[WRITE_TO_FILE]>, --write_to_file <[WRITE_TO_FILE]>\n", " Write cell content to a file. The argument is the file\n", " path; else write to {module_name}.py\n", - "\u001B[0;31mFile:\u001B[0m ~/projects/hamilton/hamilton/plugins/jupyter_magic.py" + "\u001b[0;31mFile:\u001b[0m ~/projects/hamilton/hamilton/plugins/jupyter_magic.py" ] } ], @@ -511,6 +513,7 @@ ], "source": [ "import joke\n", + "\n", "joke.joke_prompt(\"Cowsays\")" ] }, @@ -1877,6 +1880,7 @@ "outputs": [], "source": [ "from hamilton.lifecycle.default import PrintLn\n", + "\n", "my_builder = driver.Builder().with_adapters(PrintLn()) # add the adapter" ] }, diff --git a/examples/kedro/hamilton-code/notebooks/interactive.ipynb b/examples/kedro/hamilton-code/notebooks/interactive.ipynb index b77ab60f0..46581f6a9 100644 --- a/examples/kedro/hamilton-code/notebooks/interactive.ipynb +++ b/examples/kedro/hamilton-code/notebooks/interactive.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Hamilton notebook extension [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/kedro/hamilton-code/notebooks/interactive.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/kedro/hamilton-code/notebooks/interactive.ipynb)\n", + "# Hamilton notebook extension [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/kedro/hamilton-code/notebooks/interactive.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/kedro/hamilton-code/notebooks/interactive.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\kedro\\hamilton-code\\notebooks\\interactive.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\kedro\\hamilton-code\\notebooks\\interactive.ipynb)\n", + "\n", "\n", "This notebook allows to load Hamilton dataflow definition from files and edit them interactively. It uses [Hamilton Jupyter Magics](https://hamilton.apache.org/how-tos/use-in-jupyter-notebook/#use-hamilton-jupyter-magic)." ] @@ -34,6 +35,7 @@ "outputs": [], "source": [ "from hamilton import driver\n", + "\n", "%load_ext hamilton.plugins.jupyter_magic" ] }, diff --git a/examples/kedro/kedro-code/src/kedro_code/pipeline_registry.py b/examples/kedro/kedro-code/src/kedro_code/pipeline_registry.py index 54a1cc3ea..d3aa4d381 100644 --- a/examples/kedro/kedro-code/src/kedro_code/pipeline_registry.py +++ b/examples/kedro/kedro-code/src/kedro_code/pipeline_registry.py @@ -1,12 +1,10 @@ """Project pipelines.""" -from typing import Dict - from kedro.framework.project import find_pipelines from kedro.pipeline import Pipeline -def register_pipelines() -> Dict[str, Pipeline]: +def register_pipelines() -> dict[str, Pipeline]: """Register the project's pipelines. Returns: diff --git a/examples/kedro/kedro-code/src/kedro_code/pipelines/data_science/nodes.py b/examples/kedro/kedro-code/src/kedro_code/pipelines/data_science/nodes.py index ef921e3bd..3a1b1db41 100755 --- a/examples/kedro/kedro-code/src/kedro_code/pipelines/data_science/nodes.py +++ b/examples/kedro/kedro-code/src/kedro_code/pipelines/data_science/nodes.py @@ -1,5 +1,4 @@ import logging -from typing import Dict, Tuple import pandas as pd from sklearn.linear_model import LinearRegression @@ -7,7 +6,7 @@ from sklearn.model_selection import train_test_split -def split_data(data: pd.DataFrame, parameters: Dict) -> Tuple: +def split_data(data: pd.DataFrame, parameters: dict) -> tuple: """Splits data into features and targets training and test sets. Args: diff --git a/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb b/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb index bcb0e6e0a..5127a8c8d 100644 --- a/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb +++ b/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Kedro to Hamilton plugin [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb)\n", + "# Kedro to Hamilton plugin [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/kedro/kedro-plugin/kedro_to_hamilton.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\kedro\\kedro-plugin\\kedro_to_hamilton.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\kedro\\kedro-plugin\\kedro_to_hamilton.ipynb)\n", + "\n", "\n", "\n", "The plugin allows you to convert a Kedro `Pipeline` object in to a valid Hamilton `Driver`. This allows you to execute your Kedro pipeline on Hamilton and track execution using the [Hamilton UI](https://hamilton.apache.org/concepts/ui/), which provides rich observability and introspection features.\n", @@ -39,6 +40,7 @@ "source": [ "!pip install ../kedro-code\n", "import warnings\n", + "\n", "warnings.filterwarnings(\"ignore\")" ] }, @@ -171,9 +173,10 @@ } ], "source": [ - "from hamilton.plugins import h_kedro\n", "from kedro_code.pipelines import data_processing\n", "\n", + "from hamilton.plugins import h_kedro\n", + "\n", "dr = h_kedro.kedro_pipeline_to_driver(data_processing.create_pipeline())\n", "dr" ] @@ -428,9 +431,10 @@ } ], "source": [ - "from hamilton.plugins import h_kedro\n", "from kedro_code.pipelines import data_processing, data_science\n", "\n", + "from hamilton.plugins import h_kedro\n", + "\n", "dr = h_kedro.kedro_pipeline_to_driver(\n", " data_processing.create_pipeline(),\n", " data_science.create_pipeline(),\n", @@ -669,7 +673,7 @@ "import pandas as pd\n", "\n", "# loading stored data\n", - "inputs=dict(\n", + "inputs = dict(\n", " companies=pd.read_csv(\"../kedro-code/data/01_raw/companies.csv\"),\n", " reviews=pd.read_csv(\"../kedro-code/data/01_raw/reviews.csv\"),\n", " shuttles=pd.read_excel(\"../kedro-code/data/01_raw/shuttles.xlsx\"),\n", @@ -700,7 +704,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m[05/22/24 16:10:54]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mcompanies\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=554145;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=747028;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m[05/22/24 16:10:54]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mcompanies\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=554145;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=747028;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -713,7 +717,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mreviews\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=756925;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=7965;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mreviews\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=756925;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=7965;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -726,7 +730,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mshuttles\u001B[0m \u001B[1m(\u001B[0mExcelDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=791772;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=315027;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mshuttles\u001b[0m \u001b[1m(\u001b[0mExcelDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=791772;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=315027;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -915,41 +919,41 @@ "text/plain": [ "\n", " shuttle_location shuttle_type engine_type engine_vendor \\\n", - "\u001B[1;36m0\u001B[0m Sao Tome and Principe Type V5 Plasma ThetaBase Services \n", - "\u001B[1;36m1\u001B[0m Wallis and Futuna Type V2 Plasma ThetaBase Services \n", - "\u001B[1;36m2\u001B[0m Niue Type F5 Quantum ThetaBase Services \n", - "\u001B[1;36m3\u001B[0m Malta Type V2 Quantum ThetaBase Services \n", - "\u001B[1;36m4\u001B[0m Malta Type V2 Plasma ThetaBase Services \n", + "\u001b[1;36m0\u001b[0m Sao Tome and Principe Type V5 Plasma ThetaBase Services \n", + "\u001b[1;36m1\u001b[0m Wallis and Futuna Type V2 Plasma ThetaBase Services \n", + "\u001b[1;36m2\u001b[0m Niue Type F5 Quantum ThetaBase Services \n", + "\u001b[1;36m3\u001b[0m Malta Type V2 Quantum ThetaBase Services \n", + "\u001b[1;36m4\u001b[0m Malta Type V2 Plasma ThetaBase Services \n", "\n", " engines passenger_capacity cancellation_policy crew d_check_complete \\\n", - "\u001B[1;36m0\u001B[0m \u001B[1;36m2.0\u001B[0m \u001B[1;36m4\u001B[0m moderate \u001B[1;36m2.0\u001B[0m \u001B[3;91mFalse\u001B[0m \n", - "\u001B[1;36m1\u001B[0m \u001B[1;36m3.0\u001B[0m \u001B[1;36m5\u001B[0m moderate \u001B[1;36m3.0\u001B[0m \u001B[3;91mFalse\u001B[0m \n", - "\u001B[1;36m2\u001B[0m \u001B[1;36m1.0\u001B[0m \u001B[1;36m2\u001B[0m strict \u001B[1;36m1.0\u001B[0m \u001B[3;92mTrue\u001B[0m \n", - "\u001B[1;36m3\u001B[0m \u001B[1;36m1.0\u001B[0m \u001B[1;36m2\u001B[0m moderate \u001B[1;36m1.0\u001B[0m \u001B[3;91mFalse\u001B[0m \n", - "\u001B[1;36m4\u001B[0m \u001B[1;36m5.0\u001B[0m \u001B[1;36m10\u001B[0m strict \u001B[1;36m5.0\u001B[0m \u001B[3;91mFalse\u001B[0m \n", + "\u001b[1;36m0\u001b[0m \u001b[1;36m2.0\u001b[0m \u001b[1;36m4\u001b[0m moderate \u001b[1;36m2.0\u001b[0m \u001b[3;91mFalse\u001b[0m \n", + "\u001b[1;36m1\u001b[0m \u001b[1;36m3.0\u001b[0m \u001b[1;36m5\u001b[0m moderate \u001b[1;36m3.0\u001b[0m \u001b[3;91mFalse\u001b[0m \n", + "\u001b[1;36m2\u001b[0m \u001b[1;36m1.0\u001b[0m \u001b[1;36m2\u001b[0m strict \u001b[1;36m1.0\u001b[0m \u001b[3;92mTrue\u001b[0m \n", + "\u001b[1;36m3\u001b[0m \u001b[1;36m1.0\u001b[0m \u001b[1;36m2\u001b[0m moderate \u001b[1;36m1.0\u001b[0m \u001b[3;91mFalse\u001b[0m \n", + "\u001b[1;36m4\u001b[0m \u001b[1;36m5.0\u001b[0m \u001b[1;36m10\u001b[0m strict \u001b[1;36m5.0\u001b[0m \u001b[3;91mFalse\u001b[0m \n", "\n", - " moon_clearance_complete \u001B[33m...\u001B[0m review_scores_crew review_scores_location \\\n", - "\u001B[1;36m0\u001B[0m \u001B[3;91mFalse\u001B[0m \u001B[33m...\u001B[0m \u001B[1;36m9.0\u001B[0m \u001B[1;36m9.0\u001B[0m \n", - "\u001B[1;36m1\u001B[0m \u001B[3;91mFalse\u001B[0m \u001B[33m...\u001B[0m \u001B[1;36m10.0\u001B[0m \u001B[1;36m10.0\u001B[0m \n", - "\u001B[1;36m2\u001B[0m \u001B[3;91mFalse\u001B[0m \u001B[33m...\u001B[0m \u001B[1;36m10.0\u001B[0m \u001B[1;36m10.0\u001B[0m \n", - "\u001B[1;36m3\u001B[0m \u001B[3;91mFalse\u001B[0m \u001B[33m...\u001B[0m \u001B[1;36m10.0\u001B[0m \u001B[1;36m9.0\u001B[0m \n", - "\u001B[1;36m4\u001B[0m \u001B[3;91mFalse\u001B[0m \u001B[33m...\u001B[0m \u001B[1;36m10.0\u001B[0m \u001B[1;36m9.0\u001B[0m \n", + " moon_clearance_complete \u001b[33m...\u001b[0m review_scores_crew review_scores_location \\\n", + "\u001b[1;36m0\u001b[0m \u001b[3;91mFalse\u001b[0m \u001b[33m...\u001b[0m \u001b[1;36m9.0\u001b[0m \u001b[1;36m9.0\u001b[0m \n", + "\u001b[1;36m1\u001b[0m \u001b[3;91mFalse\u001b[0m \u001b[33m...\u001b[0m \u001b[1;36m10.0\u001b[0m \u001b[1;36m10.0\u001b[0m \n", + "\u001b[1;36m2\u001b[0m \u001b[3;91mFalse\u001b[0m \u001b[33m...\u001b[0m \u001b[1;36m10.0\u001b[0m \u001b[1;36m10.0\u001b[0m \n", + "\u001b[1;36m3\u001b[0m \u001b[3;91mFalse\u001b[0m \u001b[33m...\u001b[0m \u001b[1;36m10.0\u001b[0m \u001b[1;36m9.0\u001b[0m \n", + "\u001b[1;36m4\u001b[0m \u001b[3;91mFalse\u001b[0m \u001b[33m...\u001b[0m \u001b[1;36m10.0\u001b[0m \u001b[1;36m9.0\u001b[0m \n", "\n", " review_scores_price number_of_reviews reviews_per_month id \\\n", - "\u001B[1;36m0\u001B[0m \u001B[1;36m9.0\u001B[0m \u001B[1;36m26\u001B[0m \u001B[1;36m0.77\u001B[0m \u001B[1;36m32413\u001B[0m \n", - "\u001B[1;36m1\u001B[0m \u001B[1;36m9.0\u001B[0m \u001B[1;36m61\u001B[0m \u001B[1;36m0.62\u001B[0m \u001B[1;36m14122\u001B[0m \n", - "\u001B[1;36m2\u001B[0m \u001B[1;36m10.0\u001B[0m \u001B[1;36m467\u001B[0m \u001B[1;36m4.66\u001B[0m \u001B[1;36m47761\u001B[0m \n", - "\u001B[1;36m3\u001B[0m \u001B[1;36m9.0\u001B[0m \u001B[1;36m318\u001B[0m \u001B[1;36m3.22\u001B[0m \u001B[1;36m26648\u001B[0m \n", - "\u001B[1;36m4\u001B[0m \u001B[1;36m10.0\u001B[0m \u001B[1;36m22\u001B[0m \u001B[1;36m0.29\u001B[0m \u001B[1;36m26648\u001B[0m \n", + "\u001b[1;36m0\u001b[0m \u001b[1;36m9.0\u001b[0m \u001b[1;36m26\u001b[0m \u001b[1;36m0.77\u001b[0m \u001b[1;36m32413\u001b[0m \n", + "\u001b[1;36m1\u001b[0m \u001b[1;36m9.0\u001b[0m \u001b[1;36m61\u001b[0m \u001b[1;36m0.62\u001b[0m \u001b[1;36m14122\u001b[0m \n", + "\u001b[1;36m2\u001b[0m \u001b[1;36m10.0\u001b[0m \u001b[1;36m467\u001b[0m \u001b[1;36m4.66\u001b[0m \u001b[1;36m47761\u001b[0m \n", + "\u001b[1;36m3\u001b[0m \u001b[1;36m9.0\u001b[0m \u001b[1;36m318\u001b[0m \u001b[1;36m3.22\u001b[0m \u001b[1;36m26648\u001b[0m \n", + "\u001b[1;36m4\u001b[0m \u001b[1;36m10.0\u001b[0m \u001b[1;36m22\u001b[0m \u001b[1;36m0.29\u001b[0m \u001b[1;36m26648\u001b[0m \n", "\n", " company_rating company_location total_fleet_count iata_approved \n", - "\u001B[1;36m0\u001B[0m \u001B[1;36m1.0\u001B[0m Faroe Islands \u001B[1;36m1.0\u001B[0m \u001B[3;91mFalse\u001B[0m \n", - "\u001B[1;36m1\u001B[0m \u001B[1;36m1.0\u001B[0m Malta \u001B[1;36m1.0\u001B[0m \u001B[3;92mTrue\u001B[0m \n", - "\u001B[1;36m2\u001B[0m \u001B[1;36m1.0\u001B[0m Niue \u001B[1;36m2.0\u001B[0m \u001B[3;91mFalse\u001B[0m \n", - "\u001B[1;36m3\u001B[0m \u001B[1;36m1.0\u001B[0m Niue \u001B[1;36m2.0\u001B[0m \u001B[3;92mTrue\u001B[0m \n", - "\u001B[1;36m4\u001B[0m \u001B[1;36m1.0\u001B[0m Niue \u001B[1;36m2.0\u001B[0m \u001B[3;92mTrue\u001B[0m \n", + "\u001b[1;36m0\u001b[0m \u001b[1;36m1.0\u001b[0m Faroe Islands \u001b[1;36m1.0\u001b[0m \u001b[3;91mFalse\u001b[0m \n", + "\u001b[1;36m1\u001b[0m \u001b[1;36m1.0\u001b[0m Malta \u001b[1;36m1.0\u001b[0m \u001b[3;92mTrue\u001b[0m \n", + "\u001b[1;36m2\u001b[0m \u001b[1;36m1.0\u001b[0m Niue \u001b[1;36m2.0\u001b[0m \u001b[3;91mFalse\u001b[0m \n", + "\u001b[1;36m3\u001b[0m \u001b[1;36m1.0\u001b[0m Niue \u001b[1;36m2.0\u001b[0m \u001b[3;92mTrue\u001b[0m \n", + "\u001b[1;36m4\u001b[0m \u001b[1;36m1.0\u001b[0m Niue \u001b[1;36m2.0\u001b[0m \u001b[3;92mTrue\u001b[0m \n", "\n", - "\u001B[1m[\u001B[0m\u001B[1;36m5\u001B[0m rows x \u001B[1;36m27\u001B[0m columns\u001B[1m]\u001B[0m" + "\u001b[1m[\u001b[0m\u001b[1;36m5\u001b[0m rows x \u001b[1;36m27\u001b[0m columns\u001b[1m]\u001b[0m" ] }, "execution_count": 5, @@ -1009,80 +1013,80 @@ "data": { "text/plain": [ "\n", - "\u001B[1m{\u001B[0m\n", - " \u001B[32m'model_input_table__parquet'\u001B[0m: \u001B[1m{\u001B[0m\n", - " \u001B[32m'file_metadata'\u001B[0m: \u001B[1m{\u001B[0m\n", - " \u001B[32m'size'\u001B[0m: \u001B[1;36m215150\u001B[0m,\n", - " \u001B[32m'path'\u001B[0m: \u001B[32m'../kedro-code/data/03_primary/model_input_table.pq'\u001B[0m,\n", - " \u001B[32m'last_modified'\u001B[0m: \u001B[1;36m1716408659.312288\u001B[0m,\n", - " \u001B[32m'timestamp'\u001B[0m: \u001B[1;36m1716423059.328785\u001B[0m,\n", - " \u001B[32m'scheme'\u001B[0m: \u001B[32m''\u001B[0m,\n", - " \u001B[32m'notes'\u001B[0m: \u001B[32m''\u001B[0m\n", - " \u001B[1m}\u001B[0m,\n", - " \u001B[32m'dataframe_metadata'\u001B[0m: \u001B[1m{\u001B[0m\n", - " \u001B[32m'rows'\u001B[0m: \u001B[1;36m6027\u001B[0m,\n", - " \u001B[32m'columns'\u001B[0m: \u001B[1;36m27\u001B[0m,\n", - " \u001B[32m'column_names'\u001B[0m: \u001B[1m[\u001B[0m\n", - " \u001B[32m'shuttle_location'\u001B[0m,\n", - " \u001B[32m'shuttle_type'\u001B[0m,\n", - " \u001B[32m'engine_type'\u001B[0m,\n", - " \u001B[32m'engine_vendor'\u001B[0m,\n", - " \u001B[32m'engines'\u001B[0m,\n", - " \u001B[32m'passenger_capacity'\u001B[0m,\n", - " \u001B[32m'cancellation_policy'\u001B[0m,\n", - " \u001B[32m'crew'\u001B[0m,\n", - " \u001B[32m'd_check_complete'\u001B[0m,\n", - " \u001B[32m'moon_clearance_complete'\u001B[0m,\n", - " \u001B[32m'price'\u001B[0m,\n", - " \u001B[32m'company_id'\u001B[0m,\n", - " \u001B[32m'shuttle_id'\u001B[0m,\n", - " \u001B[32m'review_scores_rating'\u001B[0m,\n", - " \u001B[32m'review_scores_comfort'\u001B[0m,\n", - " \u001B[32m'review_scores_amenities'\u001B[0m,\n", - " \u001B[32m'review_scores_trip'\u001B[0m,\n", - " \u001B[32m'review_scores_crew'\u001B[0m,\n", - " \u001B[32m'review_scores_location'\u001B[0m,\n", - " \u001B[32m'review_scores_price'\u001B[0m,\n", - " \u001B[32m'number_of_reviews'\u001B[0m,\n", - " \u001B[32m'reviews_per_month'\u001B[0m,\n", - " \u001B[32m'id'\u001B[0m,\n", - " \u001B[32m'company_rating'\u001B[0m,\n", - " \u001B[32m'company_location'\u001B[0m,\n", - " \u001B[32m'total_fleet_count'\u001B[0m,\n", - " \u001B[32m'iata_approved'\u001B[0m\n", - " \u001B[1m]\u001B[0m,\n", - " \u001B[32m'datatypes'\u001B[0m: \u001B[1m[\u001B[0m\n", - " \u001B[32m'string'\u001B[0m,\n", - " \u001B[32m'string'\u001B[0m,\n", - " \u001B[32m'string'\u001B[0m,\n", - " \u001B[32m'string'\u001B[0m,\n", - " \u001B[32m'Int64'\u001B[0m,\n", - " \u001B[32m'Int64'\u001B[0m,\n", - " \u001B[32m'string'\u001B[0m,\n", - " \u001B[32m'Int64'\u001B[0m,\n", - " \u001B[32m'boolean'\u001B[0m,\n", - " \u001B[32m'boolean'\u001B[0m,\n", - " \u001B[32m'float64'\u001B[0m,\n", - " \u001B[32m'Int64'\u001B[0m,\n", - " \u001B[32m'Int64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Int64'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'Int64'\u001B[0m,\n", - " \u001B[32m'float64'\u001B[0m,\n", - " \u001B[32m'string'\u001B[0m,\n", - " \u001B[32m'Float64'\u001B[0m,\n", - " \u001B[32m'boolean'\u001B[0m\n", - " \u001B[1m]\u001B[0m\n", - " \u001B[1m}\u001B[0m\n", - " \u001B[1m}\u001B[0m\n", - "\u001B[1m}\u001B[0m" + "\u001b[1m{\u001b[0m\n", + " \u001b[32m'model_input_table__parquet'\u001b[0m: \u001b[1m{\u001b[0m\n", + " \u001b[32m'file_metadata'\u001b[0m: \u001b[1m{\u001b[0m\n", + " \u001b[32m'size'\u001b[0m: \u001b[1;36m215150\u001b[0m,\n", + " \u001b[32m'path'\u001b[0m: \u001b[32m'../kedro-code/data/03_primary/model_input_table.pq'\u001b[0m,\n", + " \u001b[32m'last_modified'\u001b[0m: \u001b[1;36m1716408659.312288\u001b[0m,\n", + " \u001b[32m'timestamp'\u001b[0m: \u001b[1;36m1716423059.328785\u001b[0m,\n", + " \u001b[32m'scheme'\u001b[0m: \u001b[32m''\u001b[0m,\n", + " \u001b[32m'notes'\u001b[0m: \u001b[32m''\u001b[0m\n", + " \u001b[1m}\u001b[0m,\n", + " \u001b[32m'dataframe_metadata'\u001b[0m: \u001b[1m{\u001b[0m\n", + " \u001b[32m'rows'\u001b[0m: \u001b[1;36m6027\u001b[0m,\n", + " \u001b[32m'columns'\u001b[0m: \u001b[1;36m27\u001b[0m,\n", + " \u001b[32m'column_names'\u001b[0m: \u001b[1m[\u001b[0m\n", + " \u001b[32m'shuttle_location'\u001b[0m,\n", + " \u001b[32m'shuttle_type'\u001b[0m,\n", + " \u001b[32m'engine_type'\u001b[0m,\n", + " \u001b[32m'engine_vendor'\u001b[0m,\n", + " \u001b[32m'engines'\u001b[0m,\n", + " \u001b[32m'passenger_capacity'\u001b[0m,\n", + " \u001b[32m'cancellation_policy'\u001b[0m,\n", + " \u001b[32m'crew'\u001b[0m,\n", + " \u001b[32m'd_check_complete'\u001b[0m,\n", + " \u001b[32m'moon_clearance_complete'\u001b[0m,\n", + " \u001b[32m'price'\u001b[0m,\n", + " \u001b[32m'company_id'\u001b[0m,\n", + " \u001b[32m'shuttle_id'\u001b[0m,\n", + " \u001b[32m'review_scores_rating'\u001b[0m,\n", + " \u001b[32m'review_scores_comfort'\u001b[0m,\n", + " \u001b[32m'review_scores_amenities'\u001b[0m,\n", + " \u001b[32m'review_scores_trip'\u001b[0m,\n", + " \u001b[32m'review_scores_crew'\u001b[0m,\n", + " \u001b[32m'review_scores_location'\u001b[0m,\n", + " \u001b[32m'review_scores_price'\u001b[0m,\n", + " \u001b[32m'number_of_reviews'\u001b[0m,\n", + " \u001b[32m'reviews_per_month'\u001b[0m,\n", + " \u001b[32m'id'\u001b[0m,\n", + " \u001b[32m'company_rating'\u001b[0m,\n", + " \u001b[32m'company_location'\u001b[0m,\n", + " \u001b[32m'total_fleet_count'\u001b[0m,\n", + " \u001b[32m'iata_approved'\u001b[0m\n", + " \u001b[1m]\u001b[0m,\n", + " \u001b[32m'datatypes'\u001b[0m: \u001b[1m[\u001b[0m\n", + " \u001b[32m'string'\u001b[0m,\n", + " \u001b[32m'string'\u001b[0m,\n", + " \u001b[32m'string'\u001b[0m,\n", + " \u001b[32m'string'\u001b[0m,\n", + " \u001b[32m'Int64'\u001b[0m,\n", + " \u001b[32m'Int64'\u001b[0m,\n", + " \u001b[32m'string'\u001b[0m,\n", + " \u001b[32m'Int64'\u001b[0m,\n", + " \u001b[32m'boolean'\u001b[0m,\n", + " \u001b[32m'boolean'\u001b[0m,\n", + " \u001b[32m'float64'\u001b[0m,\n", + " \u001b[32m'Int64'\u001b[0m,\n", + " \u001b[32m'Int64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Int64'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'Int64'\u001b[0m,\n", + " \u001b[32m'float64'\u001b[0m,\n", + " \u001b[32m'string'\u001b[0m,\n", + " \u001b[32m'Float64'\u001b[0m,\n", + " \u001b[32m'boolean'\u001b[0m\n", + " \u001b[1m]\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + " \u001b[1m}\u001b[0m\n", + "\u001b[1m}\u001b[0m" ] }, "execution_count": 6, @@ -1099,11 +1103,11 @@ " from_.csv(\n", " target=\"companies\",\n", " path=\"../kedro-code/data/01_raw/companies.csv\",\n", - " ), \n", + " ),\n", " from_.csv(\n", " target=\"reviews\",\n", " path=\"../kedro-code/data/01_raw/reviews.csv\",\n", - " ), \n", + " ),\n", " from_.excel(\n", " target=\"shuttles\",\n", " path=\"../kedro-code/data/01_raw/shuttles.xlsx\",\n", @@ -1113,7 +1117,7 @@ " id=\"model_input_table__parquet\",\n", " dependencies=[\"model_input_table\"],\n", " path=\"../kedro-code/data/03_primary/model_input_table.pq\",\n", - " )\n", + " ),\n", "]\n", "\n", "# `.materialize()` will load data using `from_` objects and store results of `to` objects\n", @@ -1143,7 +1147,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m[05/22/24 16:10:59]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mshuttles\u001B[0m \u001B[1m(\u001B[0mExcelDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=251889;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=569579;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m[05/22/24 16:10:59]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mshuttles\u001b[0m \u001b[1m(\u001b[0mExcelDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=251889;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=569579;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1156,7 +1160,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m[05/22/24 16:11:01]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mcompanies\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=698982;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=247154;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m[05/22/24 16:11:01]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mcompanies\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=698982;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=247154;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1169,7 +1173,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mreviews\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=651853;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=754005;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mreviews\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=651853;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=754005;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1182,7 +1186,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m[05/22/24 16:11:02]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Saving data to \u001B[38;5;208mmodel_input_table\u001B[0m \u001B[1m(\u001B[0mParquetDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=845498;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=59594;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#525\u001B\\\u001B[2m525\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m[05/22/24 16:11:02]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Saving data to \u001b[38;5;208mmodel_input_table\u001b[0m \u001b[1m(\u001b[0mParquetDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=845498;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=59594;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#525\u001b\\\u001b[2m525\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1201,7 +1205,7 @@ { "data": { "text/plain": [ - "\u001B[1m{\u001B[0m\u001B[32m'model_input_table__parquet'\u001B[0m: \u001B[1m{\u001B[0m\u001B[32m'success'\u001B[0m: \u001B[3;92mTrue\u001B[0m\u001B[1m}\u001B[0m\u001B[1m}\u001B[0m" + "\u001b[1m{\u001b[0m\u001b[32m'model_input_table__parquet'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'success'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m" ] }, "execution_count": 7, @@ -1264,7 +1268,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mshuttles\u001B[0m \u001B[1m(\u001B[0mExcelDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=765638;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=540969;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mshuttles\u001b[0m \u001b[1m(\u001b[0mExcelDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=765638;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=540969;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1277,7 +1281,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m[05/22/24 16:11:04]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mcompanies\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=845539;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=373818;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m[05/22/24 16:11:04]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mcompanies\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=845539;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=373818;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1290,7 +1294,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mreviews\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=673966;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=380648;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mreviews\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=673966;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=380648;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1303,7 +1307,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Saving data to \u001B[38;5;208mmodel_input_table\u001B[0m \u001B[1m(\u001B[0mParquetDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=611834;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=912002;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#525\u001B\\\u001B[2m525\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Saving data to \u001b[38;5;208mmodel_input_table\u001b[0m \u001b[1m(\u001b[0mParquetDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=611834;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=912002;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#525\u001b\\\u001b[2m525\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1322,7 +1326,7 @@ { "data": { "text/plain": [ - "\u001B[1m{\u001B[0m\u001B[32m'model_input_table__parquet'\u001B[0m: \u001B[1m{\u001B[0m\u001B[32m'success'\u001B[0m: \u001B[3;92mTrue\u001B[0m\u001B[1m}\u001B[0m\u001B[1m}\u001B[0m" + "\u001b[1m{\u001b[0m\u001b[32m'model_input_table__parquet'\u001b[0m: \u001b[1m{\u001b[0m\u001b[32m'success'\u001b[0m: \u001b[3;92mTrue\u001b[0m\u001b[1m}\u001b[0m\u001b[1m}\u001b[0m" ] }, "execution_count": 8, @@ -1338,12 +1342,16 @@ " ParquetDataset,\n", ")\n", "\n", - "catalog = DataCatalog(dict(\n", - " companies=CSVDataset(filepath=\"../kedro-code/data/01_raw/companies.csv\"),\n", - " reviews=CSVDataset(filepath=\"../kedro-code/data/01_raw/reviews.csv\"),\n", - " shuttles=ExcelDataset(filepath=\"../kedro-code/data/01_raw/shuttles.xlsx\"),\n", - " model_input_table=ParquetDataset(filepath=\"../kedro-code/data/03_primary/model_input_table.pq\")\n", - "))\n", + "catalog = DataCatalog(\n", + " dict(\n", + " companies=CSVDataset(filepath=\"../kedro-code/data/01_raw/companies.csv\"),\n", + " reviews=CSVDataset(filepath=\"../kedro-code/data/01_raw/reviews.csv\"),\n", + " shuttles=ExcelDataset(filepath=\"../kedro-code/data/01_raw/shuttles.xlsx\"),\n", + " model_input_table=ParquetDataset(\n", + " filepath=\"../kedro-code/data/03_primary/model_input_table.pq\"\n", + " ),\n", + " )\n", + ")\n", "\n", "materializers = [\n", " from_.kedro(\n", @@ -1643,7 +1651,7 @@ "\n" ], "text/plain": [ - "\u001B[1m<\u001B[0m\u001B[1;95mhamilton.driver.Driver\u001B[0m\u001B[39m object at \u001B[0m\u001B[1;36m0x7f267841b450\u001B[0m\u001B[1m>\u001B[0m" + "\u001b[1m<\u001b[0m\u001b[1;95mhamilton.driver.Driver\u001b[0m\u001b[39m object at \u001b[0m\u001b[1;36m0x7f267841b450\u001b[0m\u001b[1m>\u001b[0m" ] }, "execution_count": 10, @@ -1652,10 +1660,11 @@ } ], "source": [ - "from hamilton_sdk.adapters import HamiltonTracker\n", + "from kedro_code.pipelines import data_processing, data_science\n", + "\n", "from hamilton import driver\n", "from hamilton.plugins import h_kedro\n", - "from kedro_code.pipelines import data_processing, data_science\n", + "from hamilton_sdk.adapters import HamiltonTracker\n", "\n", "# modify this as needed\n", "tracker = HamiltonTracker(\n", @@ -1666,9 +1675,7 @@ "builder = driver.Builder().with_adapters(tracker)\n", "\n", "dr = h_kedro.kedro_pipeline_to_driver(\n", - " data_processing.create_pipeline(),\n", - " data_science.create_pipeline(),\n", - " builder=builder\n", + " data_processing.create_pipeline(), data_science.create_pipeline(), builder=builder\n", ")\n", "dr" ] @@ -1695,10 +1702,10 @@ "\n" ], "text/plain": [ - "\u001B[2;36m[05/22/24 16:11:10]\u001B[0m\u001B[2;36m \u001B[0m\u001B[31mWARNING \u001B[0m \u001B]8;id=299905;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py\u001B\\\u001B[2madapters.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=155714;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py#163\u001B\\\u001B[2m163\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m Capturing execution run. Results can be found at \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[4;94mhttp://localhost:8242/dashboard/project/3/runs/32\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[2m \u001B[0m\n" + "\u001b[2;36m[05/22/24 16:11:10]\u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m \u001b]8;id=299905;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py\u001b\\\u001b[2madapters.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=155714;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py#163\u001b\\\u001b[2m163\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m Capturing execution run. Results can be found at \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;94mhttp://localhost:8242/dashboard/project/3/runs/32\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[2m \u001b[0m\n" ] }, "metadata": {}, @@ -1711,7 +1718,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mshuttles\u001B[0m \u001B[1m(\u001B[0mExcelDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=976064;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=997503;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mshuttles\u001b[0m \u001b[1m(\u001b[0mExcelDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=976064;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=997503;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1724,7 +1731,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m[05/22/24 16:11:13]\u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mcompanies\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=134319;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=446665;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m[05/22/24 16:11:13]\u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mcompanies\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=134319;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=446665;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1737,7 +1744,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Loading data from \u001B[38;5;208mreviews\u001B[0m \u001B[1m(\u001B[0mCSVDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=620527;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=424729;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001B\\\u001B[2m483\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Loading data from \u001b[38;5;208mreviews\u001b[0m \u001b[1m(\u001b[0mCSVDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=620527;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=424729;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#483\u001b\\\u001b[2m483\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1750,7 +1757,7 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[34mINFO \u001B[0m Saving data to \u001B[38;5;208mmodel_input_table\u001B[0m \u001B[1m(\u001B[0mParquetDataset\u001B[1m)\u001B[0m\u001B[33m...\u001B[0m \u001B]8;id=102118;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001B\\\u001B[2mdata_catalog.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=182015;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#525\u001B\\\u001B[2m525\u001B[0m\u001B]8;;\u001B\\\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[34mINFO \u001b[0m Saving data to \u001b[38;5;208mmodel_input_table\u001b[0m \u001b[1m(\u001b[0mParquetDataset\u001b[1m)\u001b[0m\u001b[33m...\u001b[0m \u001b]8;id=102118;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py\u001b\\\u001b[2mdata_catalog.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=182015;file:///home/tjean/projects/dagworks/hamilton/examples/kedro/venv/lib/python3.11/site-packages/kedro/io/data_catalog.py#525\u001b\\\u001b[2m525\u001b[0m\u001b]8;;\u001b\\\n" ] }, "metadata": {}, @@ -1766,10 +1773,10 @@ "\n" ], "text/plain": [ - "\u001B[2;36m \u001B[0m\u001B[2;36m \u001B[0m\u001B[31mWARNING \u001B[0m \u001B]8;id=48567;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py\u001B\\\u001B[2madapters.py\u001B[0m\u001B]8;;\u001B\\\u001B[2m:\u001B[0m\u001B]8;id=639017;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py#352\u001B\\\u001B[2m352\u001B[0m\u001B]8;;\u001B\\\n", - "\u001B[2;36m \u001B[0m Captured execution run. Results can be found at \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[4;94mhttp://localhost:8242/dashboard/project/3/runs/32\u001B[0m \u001B[2m \u001B[0m\n", - "\u001B[2;36m \u001B[0m \u001B[2m \u001B[0m\n" + "\u001b[2;36m \u001b[0m\u001b[2;36m \u001b[0m\u001b[31mWARNING \u001b[0m \u001b]8;id=48567;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py\u001b\\\u001b[2madapters.py\u001b[0m\u001b]8;;\u001b\\\u001b[2m:\u001b[0m\u001b]8;id=639017;file:///home/tjean/projects/dagworks/hamilton/ui/sdk/src/hamilton_sdk/adapters.py#352\u001b\\\u001b[2m352\u001b[0m\u001b]8;;\u001b\\\n", + "\u001b[2;36m \u001b[0m Captured execution run. Results can be found at \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[4;94mhttp://localhost:8242/dashboard/project/3/runs/32\u001b[0m \u001b[2m \u001b[0m\n", + "\u001b[2;36m \u001b[0m \u001b[2m \u001b[0m\n" ] }, "metadata": {}, diff --git a/examples/lineage/lineage_snippets.ipynb b/examples/lineage/lineage_snippets.ipynb index 9e5947f6d..a98f7763f 100644 --- a/examples/lineage/lineage_snippets.ipynb +++ b/examples/lineage/lineage_snippets.ipynb @@ -16,7 +16,8 @@ "collapsed": false }, "source": [ - "# Lineage Code Snippets [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/lineage/lineage_snippets.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/lineage/lineage_snippets.ipynb)\n", + "# Lineage Code Snippets [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/lineage/lineage_snippets.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/lineage/lineage_snippets.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\lineage\\lineage_snippets.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\lineage\\lineage_snippets.ipynb)\n", + "\n", "\n", "Here is a notebook that shows you code snippets that you can use for lineage purposes. It uses the Hamilton code operating over the Titanic dataset for demonstration purposes." ] @@ -33,14 +34,13 @@ }, "outputs": [], "source": [ - "from hamilton import base\n", - "from hamilton import driver\n", - "\n", "# modules that house the Hamilton code we build the lineage/DAG from.\n", "import data_loading\n", "import features\n", "import model_pipeline\n", - "import sets\n" + "import sets\n", + "\n", + "from hamilton import base, driver" ] }, { @@ -67,7 +67,7 @@ "config = {} # This example has no configuration that changes the DAG/lineage shape.\n", "# instantiate the driver\n", "adapter = base.DefaultAdapter()\n", - "dr = driver.Driver(config, data_loading, features, sets, model_pipeline, adapter=adapter)\n" + "dr = driver.Driver(config, data_loading, features, sets, model_pipeline, adapter=adapter)" ] }, { @@ -893,7 +893,10 @@ " \"validation_size_fraction\": 0.33,\n", "}\n", "dr.visualize_execution(\n", - " [features.encoders], None, {}, inputs=inputs # pass in a path if you want to save the image.\n", + " [features.encoders],\n", + " None,\n", + " {},\n", + " inputs=inputs, # pass in a path if you want to save the image.\n", ")" ] }, @@ -941,11 +944,13 @@ " # filter to nodes that we're interested in getting information about\n", " if node.tags.get(\"source\"):\n", " # append for output\n", - " teams.append({\n", - " \"team\": node.tags.get(\"owner\"),\n", - " \"function\": node.name,\n", - " \"source\": node.tags.get(\"source\"),\n", - " })\n", + " teams.append(\n", + " {\n", + " \"team\": node.tags.get(\"owner\"),\n", + " \"function\": node.name,\n", + " \"source\": node.tags.get(\"source\"),\n", + " }\n", + " )\n", "teams" ] }, @@ -995,11 +1000,13 @@ " # if it's an artifact function\n", " if node.tags.get(\"artifact\"):\n", " # pull out the information we want\n", - " artifacts.append({\n", - " \"team\": node.tags.get(\"owner\"),\n", - " \"function\": node.name,\n", - " \"artifact\": node.tags.get(\"artifact\"),\n", - " })\n", + " artifacts.append(\n", + " {\n", + " \"team\": node.tags.get(\"owner\"),\n", + " \"function\": node.name,\n", + " \"artifact\": node.tags.get(\"artifact\"),\n", + " }\n", + " )\n", "artifacts" ] }, @@ -1049,8 +1056,7 @@ } ], "source": [ - "pii_nodes = [n for n in dr.list_available_variables()\n", - " if n.tags.get(\"PII\") == \"true\"]\n", + "pii_nodes = [n for n in dr.list_available_variables() if n.tags.get(\"PII\") == \"true\"]\n", "pii_to_artifacts = {}\n", "# loop through each PII node\n", "for node in pii_nodes:\n", @@ -1061,11 +1067,13 @@ " # Filter to nodes of interest\n", " if dwn_node.tags.get(\"artifact\"):\n", " # pull out information\n", - " pii_to_artifacts[node.name].append({\n", - " \"team\": dwn_node.tags.get(\"owner\"),\n", - " \"function\": dwn_node.name,\n", - " \"artifact\": dwn_node.tags.get(\"artifact\"),\n", - " })\n", + " pii_to_artifacts[node.name].append(\n", + " {\n", + " \"team\": dwn_node.tags.get(\"owner\"),\n", + " \"function\": dwn_node.name,\n", + " \"artifact\": dwn_node.tags.get(\"artifact\"),\n", + " }\n", + " )\n", "pii_to_artifacts" ] }, @@ -1376,7 +1384,9 @@ ], "source": [ "# Visualize a particular path:\n", - "dr.visualize_path_between(\"age\", \"fit_random_forest\") # pass in a path if you want to save the image.\n" + "dr.visualize_path_between(\n", + " \"age\", \"fit_random_forest\"\n", + ") # pass in a path if you want to save the image." ] }, { @@ -1506,7 +1516,9 @@ ], "source": [ "# If we want to just focus on the path we can do:\n", - "dr.visualize_path_between(\"age\", \"fit_random_forest\", strict_path_visualization=True) # pass in values if you want to save the image.\n" + "dr.visualize_path_between(\n", + " \"age\", \"fit_random_forest\", strict_path_visualization=True\n", + ") # pass in values if you want to save the image." ] }, { diff --git a/examples/materialization/datasaver_dataloader_example/notebook.ipynb b/examples/materialization/datasaver_dataloader_example/notebook.ipynb index 3d9eaaeb9..08aa9aade 100644 --- a/examples/materialization/datasaver_dataloader_example/notebook.ipynb +++ b/examples/materialization/datasaver_dataloader_example/notebook.ipynb @@ -16,7 +16,7 @@ "id": "29b68528", "metadata": {}, "source": [ - "# Materialization example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/materialization/datasaver_dataloader_example/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/materialization/datasaver_dataloader_example/notebook.ipynb)\n" + "# Materialization example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/materialization/datasaver_dataloader_example/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/materialization/datasaver_dataloader_example/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\materialization\\datasaver_dataloader_example\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\materialization\\datasaver_dataloader_example\\notebook.ipynb)\n" ] }, { @@ -308,13 +308,12 @@ } ], "source": [ - "from hamilton_sdk import adapters\n", - "\n", "from hamilton import driver\n", + "from hamilton_sdk import adapters\n", "\n", "tracker = adapters.HamiltonTracker(\n", " project_id=7, # modify this as needed\n", - " username=\"elijah@dagworks.io\", # modify this as needed\n", + " username=\"elijah@dagworks.io\", # modify this as needed\n", " dag_name=\"my_version_of_the_dag\",\n", " tags={\"environment\": \"DEV\", \"team\": \"MY_TEAM\", \"version\": \"X\"},\n", ")\n", diff --git a/examples/materialization/notebook.ipynb b/examples/materialization/notebook.ipynb index d1d117f09..311d27a8c 100644 --- a/examples/materialization/notebook.ipynb +++ b/examples/materialization/notebook.ipynb @@ -16,7 +16,7 @@ "id": "97f4cc58", "metadata": {}, "source": [ - "# Materialization [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/materialization/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/materialization/notebook.ipynb)\n" + "# Materialization [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/materialization/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/materialization/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\materialization\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\materialization\\notebook.ipynb)\n" ] }, { @@ -40,17 +40,11 @@ } ], "source": [ - "import json\n", - "import os\n", - "\n", "import data_loaders\n", "import model_training\n", "\n", "from hamilton import base, driver\n", - "from hamilton.io.materialization import to\n", - "import pandas as pd\n", - "\n", - "import custom_materializers" + "from hamilton.io.materialization import to" ] }, { @@ -76,17 +70,17 @@ "dag_config = {\n", " \"test_size_fraction\": 0.5,\n", " \"shuffle_train_test_split\": True,\n", - " \"data_loader\" : \"iris\",\n", - " \"clf\" : \"logistic\",\n", - " \"penalty\" : \"l2\"\n", + " \"data_loader\": \"iris\",\n", + " \"clf\": \"logistic\",\n", + " \"penalty\": \"l2\",\n", "}\n", "dr = (\n", - " driver.Builder()\n", - " .with_adapter(base.DefaultAdapter())\n", - " .with_config(dag_config)\n", - " .with_modules(data_loaders, model_training)\n", - " .build()\n", - " )" + " driver.Builder()\n", + " .with_adapter(base.DefaultAdapter())\n", + " .with_config(dag_config)\n", + " .with_modules(data_loaders, model_training)\n", + " .build()\n", + ")" ] }, { @@ -433,28 +427,24 @@ ], "source": [ "materializers = [\n", - " to.json(\n", - " dependencies=[\"model_parameters\"],\n", - " id=\"model_params_to_json\",\n", - " path=\"./data/params.json\"\n", - " ),\n", - " # classification report to .txt file\n", - " to.file(\n", - " dependencies=[\"classification_report\"],\n", - " id=\"classification_report_to_txt\",\n", - " path=\"./data/classification_report.txt\",\n", - " ),\n", - " # materialize the model to a pickle file\n", - " to.pickle(\n", - " dependencies=[\"fit_clf\"], id=\"clf_to_pickle\", path=\"./data/clf.pkl\"\n", - " ),\n", - " # materialize the predictions we made to a csv file\n", - " to.csv(\n", - " dependencies=[\"predicted_output_with_labels\"],\n", - " id=\"predicted_output_with_labels_to_csv\",\n", - " path=\"./data/predicted_output_with_labels.csv\",\n", - " ),\n", - " ]\n", + " to.json(\n", + " dependencies=[\"model_parameters\"], id=\"model_params_to_json\", path=\"./data/params.json\"\n", + " ),\n", + " # classification report to .txt file\n", + " to.file(\n", + " dependencies=[\"classification_report\"],\n", + " id=\"classification_report_to_txt\",\n", + " path=\"./data/classification_report.txt\",\n", + " ),\n", + " # materialize the model to a pickle file\n", + " to.pickle(dependencies=[\"fit_clf\"], id=\"clf_to_pickle\", path=\"./data/clf.pkl\"),\n", + " # materialize the predictions we made to a csv file\n", + " to.csv(\n", + " dependencies=[\"predicted_output_with_labels\"],\n", + " id=\"predicted_output_with_labels_to_csv\",\n", + " path=\"./data/predicted_output_with_labels.csv\",\n", + " ),\n", + "]\n", "\n", "dr.visualize_materialization(\n", " *materializers,\n", @@ -470,9 +460,9 @@ "outputs": [], "source": [ "materialization_results, additional_vars = dr.materialize(\n", - " # materialize model parameters to json\n", - " *materializers,\n", - " additional_vars=[\"classification_report\"],\n", + " # materialize model parameters to json\n", + " *materializers,\n", + " additional_vars=[\"classification_report\"],\n", ")" ] }, @@ -507,7 +497,7 @@ } ], "source": [ - "print(additional_vars['classification_report'])" + "print(additional_vars[\"classification_report\"])" ] }, { @@ -541,7 +531,7 @@ } ], "source": [ - "print(open((materialization_results['classification_report_to_txt']['path'])).read())" + "print(open((materialization_results[\"classification_report_to_txt\"][\"path\"])).read())" ] } ], diff --git a/examples/mlflow/tutorial.ipynb b/examples/mlflow/tutorial.ipynb index 0b06ddb04..40b281526 100644 --- a/examples/mlflow/tutorial.ipynb +++ b/examples/mlflow/tutorial.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# MLFlow plugin tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/mlflow/tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/mlflow/tutorial.ipynb)\n", + "# MLFlow plugin tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/mlflow/tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/mlflow/tutorial.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\mlflow\\tutorial.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\mlflow\\tutorial.ipynb)\n", + "\n", "\n", "This notebook shows to use the MLFlow plugin for Hamilton. The first three sections present minimal examples to introduce the core functionalities:\n", "1. Training and saving a model with `MLFlowModelSaver`\n", @@ -311,12 +312,7 @@ " register_as=\"my_predictor\", # name of the model in the MLFlow registry\n", ")\n", "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(model_training)\n", - " .with_materializers(model_saver)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(model_training).with_materializers(model_saver).build()\n", "dr" ] }, @@ -353,7 +349,6 @@ ], "source": [ "# see the full API\n", - "from hamilton.plugins.mlflow_extensions import MLFlowModelSaver\n", "MLFlowModelSaver?" ] }, @@ -801,12 +796,7 @@ " version=1,\n", ")\n", "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(model_inference)\n", - " .with_materializers(model_loader)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(model_inference).with_materializers(model_loader).build()\n", "dr" ] }, @@ -852,7 +842,6 @@ ], "source": [ "# see the full API\n", - "from hamilton.plugins.mlflow_extensions import MLFlowModelLoader\n", "MLFlowModelLoader?" ] }, @@ -1526,7 +1515,7 @@ "source": [ "results = dr.execute(\n", " [\"trained_model__mlflow\", \"train_performance\", \"test_performance\", \"test_scatter_plot\"],\n", - " inputs=dict(test_size_fraction=0.3)\n", + " inputs=dict(test_size_fraction=0.3),\n", ")" ] }, diff --git a/examples/model_examples/modular_example/notebook.ipynb b/examples/model_examples/modular_example/notebook.ipynb index c3c3eef07..67fcc1bb2 100644 --- a/examples/model_examples/modular_example/notebook.ipynb +++ b/examples/model_examples/modular_example/notebook.ipynb @@ -1,31 +1,36 @@ { "cells": [ { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, - "source": "%pip install sf-hamilton[visualization]", - "id": "6b12abc0bf96a1fa" + "id": "6b12abc0bf96a1fa", + "metadata": {}, + "outputs": [], + "source": [ + "%pip install sf-hamilton[visualization]" + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "5fdf2bac7ddc6f79", + "metadata": {}, "source": [ - "# Modular Pipeline Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/model_examples/modular_example/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/model_examples/modular_example/notebook.ipynb)\n", + "# Modular Pipeline Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/model_examples/modular_example/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/model_examples/modular_example/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\model_examples\\modular_example\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\model_examples\\modular_example\\notebook.ipynb)\n", + "\n", "This uses the jupyter magic commands to create a simple example of how to reuse pipelines in a modular manner with subdag. " - ], - "id": "5fdf2bac7ddc6f79" + ] }, { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", "metadata": { - "collapsed": true, "ExecuteTime": { "end_time": "2024-12-07T06:57:19.359572Z", "start_time": "2024-12-07T06:57:13.119759Z" - } + }, + "collapsed": true }, - "cell_type": "code", "outputs": [ { "name": "stderr", @@ -36,44 +41,97 @@ ] } ], - "execution_count": 1, - "source": "%load_ext hamilton.plugins.jupyter_magic", - "id": "initial_id" + "source": [ + "%load_ext hamilton.plugins.jupyter_magic" + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "29ebd0ec7fc5b800", + "metadata": {}, "source": [ "# Define features module\n", "\n", "This is the common data preprocessing step." - ], - "id": "29ebd0ec7fc5b800" + ] }, { + "cell_type": "code", + "execution_count": 2, + "id": "7fafbffaf2f6f68a", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T06:57:19.627950Z", "start_time": "2024-12-07T06:57:19.368576Z" } }, - "cell_type": "code", - "source": [ - "%%cell_to_module features --display\n", - "\n", - "import pandas as pd\n", - "\n", - "def raw_data(path: str) -> pd.DataFrame:\n", - " return pd.read_csv(path)\n", - "\n", - "def transformed_data(raw_data: pd.DataFrame) -> pd.DataFrame:\n", - " return raw_data.dropna()" - ], - "id": "7fafbffaf2f6f68a", "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\ntransformed_data\n\ntransformed_data\nDataFrame\n\n\n\nraw_data\n\nraw_data\nDataFrame\n\n\n\nraw_data->transformed_data\n\n\n\n\n\n_raw_data_inputs\n\npath\nstr\n\n\n\n_raw_data_inputs->raw_data\n\n\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "transformed_data\n", + "\n", + "transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data\n", + "\n", + "raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data->transformed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_raw_data_inputs\n", + "\n", + "path\n", + "str\n", + "\n", + "\n", + "\n", + "_raw_data_inputs->raw_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], "text/plain": [ "" ] @@ -82,26 +140,143 @@ "output_type": "display_data" } ], - "execution_count": 2 + "source": [ + "%%cell_to_module features --display\n", + "\n", + "import pandas as pd\n", + "\n", + "def raw_data(path: str) -> pd.DataFrame:\n", + " return pd.read_csv(path)\n", + "\n", + "def transformed_data(raw_data: pd.DataFrame) -> pd.DataFrame:\n", + " return raw_data.dropna()" + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "ee170ce894848eae", + "metadata": {}, "source": [ "# Define train module\n", "\n", "This is the training bit of the dataflow." - ], - "id": "ee170ce894848eae" + ] }, { + "cell_type": "code", + "execution_count": 3, + "id": "eae523c3fba37c93", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T06:57:19.971271Z", "start_time": "2024-12-07T06:57:19.724804Z" } }, - "cell_type": "code", + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "model\n", + "\n", + "\n", + "\n", + "model\n", + "RandomForest\n", + "\n", + "\n", + "\n", + "base_model\n", + "\n", + "base_model: model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "fit_model\n", + "\n", + "fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "base_model->fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_base_model_inputs\n", + "\n", + "model_params\n", + "dict\n", + "\n", + "\n", + "\n", + "_base_model_inputs->base_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_fit_model_inputs\n", + "\n", + "transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "_fit_model_inputs->fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%%cell_to_module train --config '{\"model\":\"RandomForest\"}'--display\n", "\n", @@ -130,55 +305,84 @@ " \"\"\"Fit a model to transformed data.\"\"\"\n", " base_model.fit(transformed_data.drop(\"target\", axis=1), transformed_data[\"target\"])\n", " return base_model\n" - ], - "id": "eae523c3fba37c93", - "outputs": [ - { - "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nmodel\n\n\n\nmodel\nRandomForest\n\n\n\nbase_model\n\nbase_model: model\ntyping.Any\n\n\n\nfit_model\n\nfit_model\ntyping.Any\n\n\n\nbase_model->fit_model\n\n\n\n\n\n_base_model_inputs\n\nmodel_params\ndict\n\n\n\n_base_model_inputs->base_model\n\n\n\n\n\n_fit_model_inputs\n\ntransformed_data\nDataFrame\n\n\n\n_fit_model_inputs->fit_model\n\n\n\n\n\nconfig\n\n\n\nconfig\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 3 + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "8cae5e1a9c682ea5", + "metadata": {}, "source": [ "# Define the inference module\n", "\n", "This houses what we need for inference." - ], - "id": "8cae5e1a9c682ea5" + ] }, { + "cell_type": "code", + "execution_count": 4, + "id": "2ad9e61062f6516a", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T06:57:20.363768Z", "start_time": "2024-12-07T06:57:20.114344Z" } }, - "cell_type": "code", - "source": [ - "%%cell_to_module inference --display\n", - "from typing import Any\n", - "import pandas as pd\n", - "\n", - "\n", - "def predicted_data(transformed_data: pd.DataFrame, fit_model: Any) -> pd.DataFrame:\n", - " return fit_model.predict(transformed_data)\n", - "\n" - ], - "id": "2ad9e61062f6516a", "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\npredicted_data\n\npredicted_data\nDataFrame\n\n\n\n_predicted_data_inputs\n\ntransformed_data\nDataFrame\nfit_model\ntyping.Any\n\n\n\n_predicted_data_inputs->predicted_data\n\n\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "predicted_data\n", + "\n", + "predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "_predicted_data_inputs\n", + "\n", + "transformed_data\n", + "DataFrame\n", + "fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "_predicted_data_inputs->predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], "text/plain": [ "" ] @@ -187,43 +391,188 @@ "output_type": "display_data" } ], - "execution_count": 4 + "source": [ + "%%cell_to_module inference --display\n", + "from typing import Any\n", + "import pandas as pd\n", + "\n", + "\n", + "def predicted_data(transformed_data: pd.DataFrame, fit_model: Any) -> pd.DataFrame:\n", + " return fit_model.predict(transformed_data)\n", + "\n" + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "3a1a0d9aca3944b1", + "metadata": {}, "source": [ "# We can combine the modules independently with different drivers\n", "\n", "But this won't provide us with a single dataflow or DAG." - ], - "id": "3a1a0d9aca3944b1" + ] }, { + "cell_type": "code", + "execution_count": 9, + "id": "9ac29701bdd31fb5", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T18:08:40.538779Z", "start_time": "2024-12-07T18:08:39.642181Z" } }, - "cell_type": "code", - "source": [ - "# train\n", - "from hamilton import driver\n", - "\n", - "train_dr = (\n", - " driver.Builder()\n", - " .with_config({\"model\": \"RandomForest\", \"model_params\": {\"n_estimators\": 100}})\n", - " .with_modules(features, train, inference)\n", - " .build()\n", - ")\n", - "train_dr.display_all_functions()" - ], - "id": "9ac29701bdd31fb5", "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nmodel\n\n\n\nmodel\nRandomForest\n\n\n\nmodel_params\n\n\n\nmodel_params\n{'n_estimators': 100}\n\n\n\npredicted_data\n\npredicted_data\nDataFrame\n\n\n\nbase_model\n\nbase_model: model\ntyping.Any\n\n\n\nfit_model\n\nfit_model\ntyping.Any\n\n\n\nbase_model->fit_model\n\n\n\n\n\ntransformed_data\n\ntransformed_data\nDataFrame\n\n\n\ntransformed_data->predicted_data\n\n\n\n\n\ntransformed_data->fit_model\n\n\n\n\n\nfit_model->predicted_data\n\n\n\n\n\nraw_data\n\nraw_data\nDataFrame\n\n\n\nraw_data->transformed_data\n\n\n\n\n\n_base_model_inputs\n\nmodel_params\ndict\n\n\n\n_base_model_inputs->base_model\n\n\n\n\n\n_raw_data_inputs\n\npath\nstr\n\n\n\n_raw_data_inputs->raw_data\n\n\n\n\n\nconfig\n\n\n\nconfig\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "model\n", + "\n", + "\n", + "\n", + "model\n", + "RandomForest\n", + "\n", + "\n", + "\n", + "model_params\n", + "\n", + "\n", + "\n", + "model_params\n", + "{'n_estimators': 100}\n", + "\n", + "\n", + "\n", + "predicted_data\n", + "\n", + "predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "base_model\n", + "\n", + "base_model: model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "fit_model\n", + "\n", + "fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "base_model->fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "transformed_data\n", + "\n", + "transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "transformed_data->predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "transformed_data->fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "fit_model->predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "raw_data\n", + "\n", + "raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data->transformed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_base_model_inputs\n", + "\n", + "model_params\n", + "dict\n", + "\n", + "\n", + "\n", + "_base_model_inputs->base_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_raw_data_inputs\n", + "\n", + "path\n", + "str\n", + "\n", + "\n", + "\n", + "_raw_data_inputs->raw_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], "text/plain": [ "" ] @@ -233,33 +582,122 @@ "output_type": "execute_result" } ], - "execution_count": 9 + "source": [ + "# train\n", + "from hamilton import driver\n", + "\n", + "train_dr = (\n", + " driver.Builder()\n", + " .with_config({\"model\": \"RandomForest\", \"model_params\": {\"n_estimators\": 100}})\n", + " .with_modules(features, train, inference)\n", + " .build()\n", + ")\n", + "train_dr.display_all_functions()" + ] }, { + "cell_type": "code", + "execution_count": 10, + "id": "cc9401ed081df22f", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T18:09:13.265102Z", "start_time": "2024-12-07T18:09:12.750662Z" } }, - "cell_type": "code", - "source": [ - "# Inference\n", - "from hamilton import driver\n", - "\n", - "inference_dr = (\n", - " driver.Builder()\n", - " .with_config({})\n", - " .with_modules(features, inference)\n", - " .build()\n", - ")\n", - "inference_dr.display_all_functions()" - ], - "id": "cc9401ed081df22f", "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\npredicted_data\n\npredicted_data\nDataFrame\n\n\n\ntransformed_data\n\ntransformed_data\nDataFrame\n\n\n\ntransformed_data->predicted_data\n\n\n\n\n\nraw_data\n\nraw_data\nDataFrame\n\n\n\nraw_data->transformed_data\n\n\n\n\n\n_predicted_data_inputs\n\nfit_model\ntyping.Any\n\n\n\n_predicted_data_inputs->predicted_data\n\n\n\n\n\n_raw_data_inputs\n\npath\nstr\n\n\n\n_raw_data_inputs->raw_data\n\n\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "predicted_data\n", + "\n", + "predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "transformed_data\n", + "\n", + "transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "transformed_data->predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "raw_data\n", + "\n", + "raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "raw_data->transformed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_predicted_data_inputs\n", + "\n", + "fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "_predicted_data_inputs->predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_raw_data_inputs\n", + "\n", + "path\n", + "str\n", + "\n", + "\n", + "\n", + "_raw_data_inputs->raw_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], "text/plain": [ "" ] @@ -269,11 +707,18 @@ "output_type": "execute_result" } ], - "execution_count": 10 + "source": [ + "# Inference\n", + "from hamilton import driver\n", + "\n", + "inference_dr = driver.Builder().with_config({}).with_modules(features, inference).build()\n", + "inference_dr.display_all_functions()" + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "d85c51388733ce96", + "metadata": {}, "source": [ "# To combine into a single dataflow we can use @subdag\n", "\n", @@ -283,17 +728,330 @@ "2. then use the fit model to predict on a separate dataset.\n", "\n", "To do that we define another module that uses the `@subdag` constructs that we wire together." - ], - "id": "d85c51388733ce96" + ] }, { + "cell_type": "code", + "execution_count": 8, + "id": "6d1585dad64464d7", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T07:00:23.770491Z", "start_time": "2024-12-07T07:00:23.481869Z" } }, - "cell_type": "code", + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "model\n", + "\n", + "\n", + "\n", + "model\n", + "RandomForest\n", + "\n", + "\n", + "\n", + "trained_pipeline.base_model\n", + "\n", + "trained_pipeline.base_model: model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "trained_pipeline.fit_model\n", + "\n", + "trained_pipeline.fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "trained_pipeline.base_model->trained_pipeline.fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "fit_model\n", + "\n", + "fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "predicted_data.fit_model\n", + "\n", + "predicted_data.fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "fit_model->predicted_data.fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data.raw_data\n", + "\n", + "predicted_data.raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.transformed_data\n", + "\n", + "predicted_data.transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.raw_data->predicted_data.transformed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data.predicted_data\n", + "\n", + "predicted_data.predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.transformed_data->predicted_data.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.predicted_data\n", + "\n", + "trained_pipeline.predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline\n", + "\n", + "trained_pipeline\n", + "dict\n", + "\n", + "\n", + "\n", + "trained_pipeline.predicted_data->trained_pipeline\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.transformed_data\n", + "\n", + "trained_pipeline.transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline.transformed_data->trained_pipeline.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.transformed_data->trained_pipeline.fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data\n", + "\n", + "predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.predicted_data->predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.model_params\n", + "\n", + "trained_pipeline.model_params\n", + "dict\n", + "\n", + "\n", + "\n", + "trained_pipeline.model_params->trained_pipeline.base_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.fit_model->trained_pipeline.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.fit_model->trained_pipeline\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data.fit_model->predicted_data.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data.path\n", + "\n", + "predicted_data.path\n", + "str\n", + "\n", + "\n", + "\n", + "predicted_data.path->predicted_data.raw_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.path\n", + "\n", + "trained_pipeline.path\n", + "str\n", + "\n", + "\n", + "\n", + "trained_pipeline.raw_data\n", + "\n", + "trained_pipeline.raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline.path->trained_pipeline.raw_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline->fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "training_prediction\n", + "\n", + "training_prediction\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline->training_prediction\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.raw_data->trained_pipeline.transformed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_trained_pipeline.model_params_inputs\n", + "\n", + "model_params\n", + "dict\n", + "\n", + "\n", + "\n", + "_trained_pipeline.model_params_inputs->trained_pipeline.model_params\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_predicted_data.path_inputs\n", + "\n", + "predict_path\n", + "str\n", + "\n", + "\n", + "\n", + "_predicted_data.path_inputs->predicted_data.path\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_trained_pipeline.path_inputs\n", + "\n", + "path\n", + "str\n", + "\n", + "\n", + "\n", + "_trained_pipeline.path_inputs->trained_pipeline.path\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%%cell_to_module pipeline --config '{\"model\":\"RandomForest\"}' --display\n", "from typing import Any\n", @@ -331,46 +1089,338 @@ ")\n", "def predicted_data(predicted_data: pd.DataFrame) -> pd.DataFrame:\n", " return predicted_data" - ], - "id": "6d1585dad64464d7", - "outputs": [ - { - "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nmodel\n\n\n\nmodel\nRandomForest\n\n\n\ntrained_pipeline.base_model\n\ntrained_pipeline.base_model: model\ntyping.Any\n\n\n\ntrained_pipeline.fit_model\n\ntrained_pipeline.fit_model\ntyping.Any\n\n\n\ntrained_pipeline.base_model->trained_pipeline.fit_model\n\n\n\n\n\nfit_model\n\nfit_model\ntyping.Any\n\n\n\npredicted_data.fit_model\n\npredicted_data.fit_model\ntyping.Any\n\n\n\nfit_model->predicted_data.fit_model\n\n\n\n\n\npredicted_data.raw_data\n\npredicted_data.raw_data\nDataFrame\n\n\n\npredicted_data.transformed_data\n\npredicted_data.transformed_data\nDataFrame\n\n\n\npredicted_data.raw_data->predicted_data.transformed_data\n\n\n\n\n\npredicted_data.predicted_data\n\npredicted_data.predicted_data\nDataFrame\n\n\n\npredicted_data.transformed_data->predicted_data.predicted_data\n\n\n\n\n\ntrained_pipeline.predicted_data\n\ntrained_pipeline.predicted_data\nDataFrame\n\n\n\ntrained_pipeline\n\ntrained_pipeline\ndict\n\n\n\ntrained_pipeline.predicted_data->trained_pipeline\n\n\n\n\n\ntrained_pipeline.transformed_data\n\ntrained_pipeline.transformed_data\nDataFrame\n\n\n\ntrained_pipeline.transformed_data->trained_pipeline.predicted_data\n\n\n\n\n\ntrained_pipeline.transformed_data->trained_pipeline.fit_model\n\n\n\n\n\npredicted_data\n\npredicted_data\nDataFrame\n\n\n\npredicted_data.predicted_data->predicted_data\n\n\n\n\n\ntrained_pipeline.model_params\n\ntrained_pipeline.model_params\ndict\n\n\n\ntrained_pipeline.model_params->trained_pipeline.base_model\n\n\n\n\n\ntrained_pipeline.fit_model->trained_pipeline.predicted_data\n\n\n\n\n\ntrained_pipeline.fit_model->trained_pipeline\n\n\n\n\n\npredicted_data.fit_model->predicted_data.predicted_data\n\n\n\n\n\npredicted_data.path\n\npredicted_data.path\nstr\n\n\n\npredicted_data.path->predicted_data.raw_data\n\n\n\n\n\ntrained_pipeline.path\n\ntrained_pipeline.path\nstr\n\n\n\ntrained_pipeline.raw_data\n\ntrained_pipeline.raw_data\nDataFrame\n\n\n\ntrained_pipeline.path->trained_pipeline.raw_data\n\n\n\n\n\ntrained_pipeline->fit_model\n\n\n\n\n\ntraining_prediction\n\ntraining_prediction\nDataFrame\n\n\n\ntrained_pipeline->training_prediction\n\n\n\n\n\ntrained_pipeline.raw_data->trained_pipeline.transformed_data\n\n\n\n\n\n_trained_pipeline.model_params_inputs\n\nmodel_params\ndict\n\n\n\n_trained_pipeline.model_params_inputs->trained_pipeline.model_params\n\n\n\n\n\n_predicted_data.path_inputs\n\npredict_path\nstr\n\n\n\n_predicted_data.path_inputs->predicted_data.path\n\n\n\n\n\n_trained_pipeline.path_inputs\n\npath\nstr\n\n\n\n_trained_pipeline.path_inputs->trained_pipeline.path\n\n\n\n\n\nconfig\n\n\n\nconfig\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 8 + ] }, { + "cell_type": "code", + "execution_count": 6, + "id": "f72146c07a654ca4", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T06:57:20.874962Z", "start_time": "2024-12-07T06:57:20.643256Z" } }, - "cell_type": "code", - "source": [ - "from hamilton import driver\n", - "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_config({\"model\": \"RandomForest\", \"model_params\": {\"n_estimators\": 100}})\n", - " .with_modules(pipeline)\n", - " .build()\n", - ")\n", - "dr.display_all_functions()" - ], - "id": "f72146c07a654ca4", "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nmodel\n\n\n\nmodel\nRandomForest\n\n\n\nmodel_params\n\n\n\nmodel_params\n{'n_estimators': 100}\n\n\n\ntrained_pipeline.base_model\n\ntrained_pipeline.base_model: model\ntyping.Any\n\n\n\ntrained_pipeline.fit_model\n\ntrained_pipeline.fit_model\ntyping.Any\n\n\n\ntrained_pipeline.base_model->trained_pipeline.fit_model\n\n\n\n\n\nfit_model\n\nfit_model\ntyping.Any\n\n\n\npredicted_data.fit_model\n\npredicted_data.fit_model\ntyping.Any\n\n\n\nfit_model->predicted_data.fit_model\n\n\n\n\n\ntrained_pipeline.model\n\ntrained_pipeline.model\nUpstreamDependency\n\n\n\npredicted_data.raw_data\n\npredicted_data.raw_data\nDataFrame\n\n\n\npredicted_data.transformed_data\n\npredicted_data.transformed_data\nDataFrame\n\n\n\npredicted_data.raw_data->predicted_data.transformed_data\n\n\n\n\n\npredicted_data.predicted_data\n\npredicted_data.predicted_data\nDataFrame\n\n\n\npredicted_data.transformed_data->predicted_data.predicted_data\n\n\n\n\n\ntrained_pipeline.predicted_data\n\ntrained_pipeline.predicted_data\nDataFrame\n\n\n\ntrained_pipeline\n\ntrained_pipeline\ndict\n\n\n\ntrained_pipeline.predicted_data->trained_pipeline\n\n\n\n\n\ntrained_pipeline.transformed_data\n\ntrained_pipeline.transformed_data\nDataFrame\n\n\n\ntrained_pipeline.transformed_data->trained_pipeline.predicted_data\n\n\n\n\n\ntrained_pipeline.transformed_data->trained_pipeline.fit_model\n\n\n\n\n\npredicted_data\n\npredicted_data\nDataFrame\n\n\n\npredicted_data.predicted_data->predicted_data\n\n\n\n\n\ntrained_pipeline.model_params\n\ntrained_pipeline.model_params\ndict\n\n\n\ntrained_pipeline.model_params->trained_pipeline.base_model\n\n\n\n\n\ntrained_pipeline.fit_model->trained_pipeline.predicted_data\n\n\n\n\n\ntrained_pipeline.fit_model->trained_pipeline\n\n\n\n\n\npredicted_data.fit_model->predicted_data.predicted_data\n\n\n\n\n\npredicted_data.path\n\npredicted_data.path\nstr\n\n\n\npredicted_data.path->predicted_data.raw_data\n\n\n\n\n\ntrained_pipeline.path\n\ntrained_pipeline.path\nstr\n\n\n\ntrained_pipeline.raw_data\n\ntrained_pipeline.raw_data\nDataFrame\n\n\n\ntrained_pipeline.path->trained_pipeline.raw_data\n\n\n\n\n\ntrained_pipeline->fit_model\n\n\n\n\n\ntraining_prediction\n\ntraining_prediction\nDataFrame\n\n\n\ntrained_pipeline->training_prediction\n\n\n\n\n\ntrained_pipeline.raw_data->trained_pipeline.transformed_data\n\n\n\n\n\n_trained_pipeline.model_params_inputs\n\nmodel_params\ndict\n\n\n\n_trained_pipeline.model_params_inputs->trained_pipeline.model_params\n\n\n\n\n\n_predicted_data.path_inputs\n\npredict_path\nstr\n\n\n\n_predicted_data.path_inputs->predicted_data.path\n\n\n\n\n\n_trained_pipeline.path_inputs\n\npath\nstr\n\n\n\n_trained_pipeline.path_inputs->trained_pipeline.path\n\n\n\n\n\nconfig\n\n\n\nconfig\n\n\n\ninput\n\ninput\n\n\n\nfunction\n\nfunction\n\n\n\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "model\n", + "\n", + "\n", + "\n", + "model\n", + "RandomForest\n", + "\n", + "\n", + "\n", + "model_params\n", + "\n", + "\n", + "\n", + "model_params\n", + "{'n_estimators': 100}\n", + "\n", + "\n", + "\n", + "trained_pipeline.base_model\n", + "\n", + "trained_pipeline.base_model: model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "trained_pipeline.fit_model\n", + "\n", + "trained_pipeline.fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "trained_pipeline.base_model->trained_pipeline.fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "fit_model\n", + "\n", + "fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "predicted_data.fit_model\n", + "\n", + "predicted_data.fit_model\n", + "typing.Any\n", + "\n", + "\n", + "\n", + "fit_model->predicted_data.fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.model\n", + "\n", + "trained_pipeline.model\n", + "UpstreamDependency\n", + "\n", + "\n", + "\n", + "predicted_data.raw_data\n", + "\n", + "predicted_data.raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.transformed_data\n", + "\n", + "predicted_data.transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.raw_data->predicted_data.transformed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data.predicted_data\n", + "\n", + "predicted_data.predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.transformed_data->predicted_data.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.predicted_data\n", + "\n", + "trained_pipeline.predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline\n", + "\n", + "trained_pipeline\n", + "dict\n", + "\n", + "\n", + "\n", + "trained_pipeline.predicted_data->trained_pipeline\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.transformed_data\n", + "\n", + "trained_pipeline.transformed_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline.transformed_data->trained_pipeline.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.transformed_data->trained_pipeline.fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data\n", + "\n", + "predicted_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "predicted_data.predicted_data->predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.model_params\n", + "\n", + "trained_pipeline.model_params\n", + "dict\n", + "\n", + "\n", + "\n", + "trained_pipeline.model_params->trained_pipeline.base_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.fit_model->trained_pipeline.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.fit_model->trained_pipeline\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data.fit_model->predicted_data.predicted_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "predicted_data.path\n", + "\n", + "predicted_data.path\n", + "str\n", + "\n", + "\n", + "\n", + "predicted_data.path->predicted_data.raw_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.path\n", + "\n", + "trained_pipeline.path\n", + "str\n", + "\n", + "\n", + "\n", + "trained_pipeline.raw_data\n", + "\n", + "trained_pipeline.raw_data\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline.path->trained_pipeline.raw_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline->fit_model\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "training_prediction\n", + "\n", + "training_prediction\n", + "DataFrame\n", + "\n", + "\n", + "\n", + "trained_pipeline->training_prediction\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "trained_pipeline.raw_data->trained_pipeline.transformed_data\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_trained_pipeline.model_params_inputs\n", + "\n", + "model_params\n", + "dict\n", + "\n", + "\n", + "\n", + "_trained_pipeline.model_params_inputs->trained_pipeline.model_params\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_predicted_data.path_inputs\n", + "\n", + "predict_path\n", + "str\n", + "\n", + "\n", + "\n", + "_predicted_data.path_inputs->predicted_data.path\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "_trained_pipeline.path_inputs\n", + "\n", + "path\n", + "str\n", + "\n", + "\n", + "\n", + "_trained_pipeline.path_inputs->trained_pipeline.path\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "config\n", + "\n", + "\n", + "\n", + "input\n", + "\n", + "input\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], "text/plain": [ "" ] @@ -380,32 +1430,42 @@ "output_type": "execute_result" } ], - "execution_count": 6 + "source": [ + "from hamilton import driver\n", + "\n", + "dr = (\n", + " driver.Builder()\n", + " .with_config({\"model\": \"RandomForest\", \"model_params\": {\"n_estimators\": 100}})\n", + " .with_modules(pipeline)\n", + " .build()\n", + ")\n", + "dr.display_all_functions()" + ] }, { + "cell_type": "code", + "execution_count": 7, + "id": "b3abca24b1a86329", "metadata": { "ExecuteTime": { "end_time": "2024-12-07T06:57:34.959772Z", "start_time": "2024-12-07T06:57:34.956204Z" } }, - "cell_type": "code", + "outputs": [], "source": [ "# this wont work because we don't actually have data...\n", - "# dr.execute([\"trained_pipeline\", \"predicted_data\"], \n", + "# dr.execute([\"trained_pipeline\", \"predicted_data\"],\n", "# inputs={\"path\": \"data.csv\", \"predict_path\": \"data.csv\"})" - ], - "id": "b3abca24b1a86329", - "outputs": [], - "execution_count": 7 + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, - "source": "", - "id": "1b3dba37a6c00d7c" + "id": "1b3dba37a6c00d7c", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb b/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb index 80ee07a76..5349244f6 100644 --- a/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb +++ b/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb @@ -16,7 +16,8 @@ "id": "7c1fa180", "metadata": {}, "source": [ - "# Hamilton for ML dataflows [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb)\n", + "# Hamilton for ML dataflows [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/model_examples/scikit-learn/Hamilton_for_ML_dataflows.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\model_examples\\scikit-learn\\Hamilton_for_ML_dataflows.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\model_examples\\scikit-learn\\Hamilton_for_ML_dataflows.ipynb)\n", + "\n", "\n", "\n", "#### Requirements:\n", @@ -194,8 +195,8 @@ } ], "source": [ - "_data_set = 'digits' # the data set to load\n", - "_model_type = 'logistic' # the model type to fit and evaluate with\n", + "_data_set = \"digits\" # the data set to load\n", + "_model_type = \"logistic\" # the model type to fit and evaluate with\n", "\n", "dag_config = {\n", " \"test_size_fraction\": 0.5,\n", @@ -530,8 +531,11 @@ } ], "source": [ - "dr.visualize_execution(['classification_report', 'confusion_matrix', 'fit_clf'],\n", - " f'./model_dag_{_data_set}_{_model_type}.dot', {\"format\": \"png\"})" + "dr.visualize_execution(\n", + " [\"classification_report\", \"confusion_matrix\", \"fit_clf\"],\n", + " f\"./model_dag_{_data_set}_{_model_type}.dot\",\n", + " {\"format\": \"png\"},\n", + ")" ] } ], diff --git a/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb b/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb index 2214a641c..c718aab9c 100644 --- a/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb +++ b/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb @@ -16,7 +16,8 @@ "id": "67a331ba", "metadata": {}, "source": [ - "# Hamilton - Time Series model [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb)\n", + "# Hamilton - Time Series model [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/model_examples/time-series/Hamilton-TimeSeriesmodel.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\model_examples\\time-series\\Hamilton-TimeSeriesmodel.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\model_examples\\time-series\\Hamilton-TimeSeriesmodel.ipynb)\n", + "\n", "\n", "\n", "#### Requirements:\n", diff --git a/examples/mutate/abstract_functionality_blueprint/notebook.ipynb b/examples/mutate/abstract_functionality_blueprint/notebook.ipynb index f6061ea4c..5df10e688 100644 --- a/examples/mutate/abstract_functionality_blueprint/notebook.ipynb +++ b/examples/mutate/abstract_functionality_blueprint/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Using `mutate` for pre-processing and feature engineering [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/mutate/abstract_functionality_blueprint/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/mutate/abstract_functionality_blueprint/notebook.ipynb)\n", + "# Using `mutate` for pre-processing and feature engineering [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/mutate/abstract_functionality_blueprint/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/mutate/abstract_functionality_blueprint/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\mutate\\abstract_functionality_blueprint\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\mutate\\abstract_functionality_blueprint\\notebook.ipynb)\n", + "\n", "\n", "\n", "We give some application suggestions for mutating the outputs of functions in a distributed manner with `@mutate.`\n", @@ -1129,12 +1130,12 @@ "metadata": {}, "outputs": [], "source": [ - "from hamilton import base\n", - "from hamilton_sdk import adapters\n", - "from hamilton.plugins.h_ray import RayGraphAdapter,RayTaskExecutor\n", + "import mutate\n", "import ray\n", "\n", - "import mutate\n", + "from hamilton import base\n", + "from hamilton.plugins.h_ray import RayGraphAdapter, RayTaskExecutor\n", + "from hamilton_sdk import adapters\n", "\n", "remote_executor = RayTaskExecutor(num_cpus=4)\n", "shutdown = ray.shutdown\n", @@ -1150,10 +1151,10 @@ "outputs": [], "source": [ "tracker_ray = adapters.HamiltonTracker(\n", - " project_id=project_id,\n", - " username=username,\n", - " dag_name=\"mutate ray graph adapter\",\n", - " )\n", + " project_id=project_id,\n", + " username=username,\n", + " dag_name=\"mutate ray graph adapter\",\n", + ")\n", "ray.init()\n", "rga = RayGraphAdapter(result_builder=base.DictResult(), shutdown_ray_on_completion=True)\n", "dr = driver.Builder().with_modules(mutate).with_adapters(rga, tracker_ray).build()\n", @@ -1168,23 +1169,23 @@ "outputs": [], "source": [ "tracker_ray = adapters.HamiltonTracker(\n", - " project_id=project_id,\n", - " username=username,\n", - " dag_name=\"mutate ray task executor\",\n", - " )\n", + " project_id=project_id,\n", + " username=username,\n", + " dag_name=\"mutate ray task executor\",\n", + ")\n", "\n", "dr = (\n", - " driver.Builder()\n", - " .enable_dynamic_execution(allow_experimental_mode=True)\n", - " .with_modules(mutate)\n", - " .with_remote_executor(remote_executor)\n", - " .with_adapters(tracker_ray)\n", - " .build()\n", - " )\n", + " driver.Builder()\n", + " .enable_dynamic_execution(allow_experimental_mode=True)\n", + " .with_modules(mutate)\n", + " .with_remote_executor(remote_executor)\n", + " .with_adapters(tracker_ray)\n", + " .build()\n", + ")\n", "\n", "print(dr.execute(final_vars=[\"data_1\", \"data_2\", \"feat_A\"]))\n", "if shutdown is not None:\n", - " shutdown()\n" + " shutdown()" ] }, { diff --git a/examples/narwhals/notebook.ipynb b/examples/narwhals/notebook.ipynb index a0fce5ab1..137a0b1a3 100644 --- a/examples/narwhals/notebook.ipynb +++ b/examples/narwhals/notebook.ipynb @@ -16,7 +16,8 @@ "id": "d1222755", "metadata": {}, "source": [ - "# run me in google colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/narwhals/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/narwhals/notebook.ipynb)\n", + "# run me in google colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/narwhals/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/narwhals/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\narwhals\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\narwhals\\notebook.ipynb)\n", + "\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/narwhals/notebook.ipynb)" ] @@ -64,10 +65,9 @@ }, "outputs": [], "source": [ - "config = {\n", - " \"mode\": \"pandas\"\n", - "}\n", + "config = {\"mode\": \"pandas\"}\n", "from hamilton import driver\n", + "\n", "builder = driver.Builder()" ] }, @@ -297,6 +297,7 @@ "source": [ "from hamilton import base, driver\n", "from hamilton.plugins import h_narwhals, h_polars\n", + "\n", "# pandas\n", "dr = (\n", " driver.Builder()\n", @@ -365,7 +366,7 @@ " )\n", " .build()\n", ")\n", - "result= dr.execute([example.group_by_mean, example.example1], inputs={\"col_name\": \"a\"})\n", + "result = dr.execute([example.group_by_mean, example.example1], inputs={\"col_name\": \"a\"})\n", "result" ] }, diff --git a/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb b/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb index c75343bd9..6da5bf5f1 100644 --- a/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb +++ b/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Air quality analysis with numpy [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb)\n" + "# Air quality analysis with numpy [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/numpy/air-quality-analysis/hamilton_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\numpy\\air-quality-analysis\\hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\numpy\\air-quality-analysis\\hamilton_notebook.ipynb)\n" ] }, { @@ -26,15 +26,14 @@ "# Cell 1 - import the things you need\n", "import logging\n", "import sys\n", - "from functools import partial\n", "import typing\n", + "from functools import partial\n", "\n", "import numpy as np\n", - "import pandas as pd\n", "from numpy.random import default_rng\n", "from scipy import stats\n", "\n", - "from hamilton import ad_hoc_utils, driver, base\n", + "from hamilton import ad_hoc_utils, base, driver\n", "\n", "logging.basicConfig(stream=sys.stdout)" ] @@ -56,7 +55,7 @@ "# import the function modules you want to reload when they change.\n", "# i.e. these should be your modules you write your functions in. As you change them,\n", "# they will be reimported without you having to do anything.\n", - "# %aimport analysis_flow " + "# %aimport analysis_flow" ] }, { @@ -67,6 +66,7 @@ "source": [ "# Some helper functions for the calculations\n", "\n", + "\n", "def _moving_mean(a, n):\n", " \"\"\"Computes the moving mean using numpy constructs.\"\"\"\n", " ret = np.cumsum(a, dtype=float, axis=0)\n", @@ -140,7 +140,7 @@ " var = np.var(diff, ddof=1)\n", " num = np.mean(diff)\n", " denom = np.sqrt(var / len(x))\n", - " return np.divide(num, denom)\n" + " return np.divide(num, denom)" ] }, { @@ -158,9 +158,7 @@ "\n", "def pollutant_data(input_file_name: str = \"air-quality-data.csv\") -> np.ndarray:\n", " \"\"\"Returns the raw pollutant data.\"\"\"\n", - " return np.loadtxt(\n", - " input_file_name, dtype=float, delimiter=\",\", skiprows=1, usecols=range(1, 8)\n", - " )\n", + " return np.loadtxt(input_file_name, dtype=float, delimiter=\",\", skiprows=1, usecols=range(1, 8))\n", "\n", "\n", "def pollutants_A(pollutant_data: np.ndarray) -> np.ndarray:\n", @@ -178,9 +176,7 @@ " return _moving_mean(pollutants_A, 24)\n", "\n", "\n", - "def pollutants_B_8hr_avg(\n", - " pollutants_B: np.ndarray, pollutants_A_24hr_avg: np.ndarray\n", - ") -> np.ndarray:\n", + "def pollutants_B_8hr_avg(pollutants_B: np.ndarray, pollutants_A_24hr_avg: np.ndarray) -> np.ndarray:\n", " \"\"\"8 hour move average of pollutant B.\n", " To make sure both the sets are of the same length, we will truncate the pollutants_B_8hr_avg according to\n", " the length of pollutants_A_24hr_avg. This will also ensure we have concentrations for all the pollutants\n", @@ -189,9 +185,7 @@ " return _moving_mean(pollutants_B, 8)[-(pollutants_A_24hr_avg.shape[0]) :]\n", "\n", "\n", - "def pollutants(\n", - " pollutants_A_24hr_avg: np.ndarray, pollutants_B_8hr_avg: np.ndarray\n", - ") -> np.ndarray:\n", + "def pollutants(pollutants_A_24hr_avg: np.ndarray, pollutants_B_8hr_avg: np.ndarray) -> np.ndarray:\n", " \"\"\"Concatenates Pollutants A and Pollutants B.\n", "\n", " Now, we can join both sets with np.concatenate to form a single data set of all the averaged concentrations.\n", @@ -222,9 +216,7 @@ " }\n", "\n", "\n", - "def sub_indices(\n", - " pollutants: np.ndarray, breakpoints: dict, AQI: np.ndarray\n", - ") -> np.ndarray:\n", + "def sub_indices(pollutants: np.ndarray, breakpoints: dict, AQI: np.ndarray) -> np.ndarray:\n", " \"\"\"Return sub indicies.\n", "\n", " The subindices for each pollutant are calculated according to the linear relationship between the AQI and standard\n", @@ -284,14 +276,12 @@ "# Finally we'll do the t_test and p_value calculations in thier own module\n", "\n", "\n", - "def datetime_index(\n", - " pollutants_A_24hr_avg: np.ndarray, input_file_name: str\n", - ") -> np.ndarray:\n", + "def datetime_index(pollutants_A_24hr_avg: np.ndarray, input_file_name: str) -> np.ndarray:\n", " \"\"\"We will now import the datetime column from our original dataset into a datetime64 dtype array.\n", " We will use this array to index the AQI array and obtain subsets of the dataset.\"\"\"\n", - " return np.loadtxt(\n", - " input_file_name, dtype=\"M8[h]\", delimiter=\",\", skiprows=1, usecols=(0,)\n", - " )[-(pollutants_A_24hr_avg.shape[0]) :]\n", + " return np.loadtxt(input_file_name, dtype=\"M8[h]\", delimiter=\",\", skiprows=1, usecols=(0,))[\n", + " -(pollutants_A_24hr_avg.shape[0]) :\n", + " ]\n", "\n", "\n", "def after_lock(\n", @@ -734,7 +724,7 @@ "# To visualize do `pip install \"sf-hamilton[visualization]\"` if you want these to work\n", "\n", "# visualize all possible functions\n", - "dr.display_all_functions() # no args needed for a notebook." + "dr.display_all_functions() # no args needed for a notebook." ] }, { @@ -1099,8 +1089,8 @@ ], "source": [ "sample_matrix = base.NumpyMatrixResult().build_result(\n", - " before_sample=result[\"before_sample\"], after_sample=result[\"after_sample\"]\n", - " )\n", + " before_sample=result[\"before_sample\"], after_sample=result[\"after_sample\"]\n", + ")\n", "print(sample_matrix)" ] }, diff --git a/examples/openlineage/notebook.ipynb b/examples/openlineage/notebook.ipynb index 3a9b95987..4cbf97403 100644 --- a/examples/openlineage/notebook.ipynb +++ b/examples/openlineage/notebook.ipynb @@ -16,7 +16,8 @@ "id": "f937d835", "metadata": {}, "source": [ - "# OpenLineage example pipeline [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/openlineage/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/openlineage/notebook.ipynb)\n", + "# OpenLineage example pipeline [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/openlineage/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/openlineage/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\openlineage\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\openlineage\\notebook.ipynb)\n", + "\n", "\n", "\n", "This is a simple example of a pipeline that reads data from a file and a database, joins them, fits a model, and saves the model to a file and the joined data to a database. The pipeline does not import open lineage, and doesn't need to know about it. The salient point is that metadata is exposed by the data loading and data\n", @@ -635,10 +636,11 @@ } ], "source": [ - "from hamilton.plugins import h_openlineage\n", + "import pipeline\n", + "\n", "from hamilton import driver\n", + "from hamilton.plugins import h_openlineage\n", "\n", - "import pipeline\n", "ola = h_openlineage.OpenLineageAdapter(client, \"demo_namespace\", \"my_hamilton_job\")\n", "\n", "# create the DAG\n", @@ -661,6 +663,7 @@ "source": [ "# create inputs to run the DAG\n", "import sqlite3\n", + "\n", "db_client = sqlite3.connect(\"purchase_data.db\")\n", "\n", "# execute & emit lineage\n", diff --git a/examples/pandas/materialization/notebook.ipynb b/examples/pandas/materialization/notebook.ipynb index 601115b24..eae61c3bd 100644 --- a/examples/pandas/materialization/notebook.ipynb +++ b/examples/pandas/materialization/notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Pandas materialization [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/materialization/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/pandas/materialization/notebook.ipynb)\n" + "# Pandas materialization [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/materialization/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/pandas/materialization/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\pandas\\materialization\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\pandas\\materialization\\notebook.ipynb)\n" ] }, { @@ -29,13 +29,12 @@ "outputs": [], "source": [ "import sqlite3\n", - "import sys\n", + "\n", "import pandas as pd\n", "\n", "# Add the hamilton module to your path - optional\n", "# project_dir = \"### ADD PATH HERE ###\"\n", "# sys.path.append(project_dir)\n", - "\n", "from hamilton import base, driver\n", "from hamilton.io.materialization import to" ] diff --git a/examples/pandas/split-apply-combine/notebook.ipynb b/examples/pandas/split-apply-combine/notebook.ipynb index 94b134d95..f0547e5ad 100644 --- a/examples/pandas/split-apply-combine/notebook.ipynb +++ b/examples/pandas/split-apply-combine/notebook.ipynb @@ -16,7 +16,8 @@ "id": "0b1abf89", "metadata": {}, "source": [ - "# Run me in google colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/split-apply-combine/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/pandas/split-apply-combine/notebook.ipynb)\n", + "# Run me in google colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/split-apply-combine/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/pandas/split-apply-combine/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\pandas\\split-apply-combine\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\pandas\\split-apply-combine\\notebook.ipynb)\n", + "\n", "\n", "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/split-apply-combine/notebook.ipynb)" ] @@ -574,7 +575,6 @@ " lifecycle.FunctionInputOutputTypeChecker(),\n", " # this will make execute return a pandas dataframe as a result\n", " base.PandasDataFrameResult(),\n", - " \n", " )\n", " .build()\n", ")\n", @@ -597,7 +597,7 @@ " @staticmethod\n", " def visualize():\n", " # To visualize do `pip install \"sf-hamilton[visualization]\"` if you want these to work\n", - " return driver.display_all_functions()\n" + " return driver.display_all_functions()" ] }, { @@ -865,9 +865,9 @@ ], "source": [ "driver.execute(\n", - " inputs={\"input\": input, \"tax_rates\": tax_rates, \"tax_credits\": tax_credits},\n", - " final_vars=[\"final_tax_dataframe\"],\n", - " )" + " inputs={\"input\": input, \"tax_rates\": tax_rates, \"tax_credits\": tax_credits},\n", + " final_vars=[\"final_tax_dataframe\"],\n", + ")" ] }, { diff --git a/examples/pandas/with_columns/notebook.ipynb b/examples/pandas/with_columns/notebook.ipynb index 94953a288..a01da52a9 100644 --- a/examples/pandas/with_columns/notebook.ipynb +++ b/examples/pandas/with_columns/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example of using with_columns for Pandas [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/with_columns/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/pandas/with_columns/notebook.ipynb)\n", + "# Example of using with_columns for Pandas [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/pandas/with_columns/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/pandas/with_columns/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\pandas\\with_columns\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\pandas\\with_columns\\notebook.ipynb)\n", + "\n", "\n", "This allows you to efficiently run groups of map operations on a dataframe.\n", "Here's an example of calling it -- if you've seen `@subdag`, you should be familiar with the concepts." @@ -36,10 +37,11 @@ ], "source": [ "%reload_ext hamilton.plugins.jupyter_magic\n", - "from hamilton import driver\n", "import my_functions\n", "\n", - "my_builder = driver.Builder().with_modules(my_functions).with_config({\"case\":\"thousands\"})\n", + "from hamilton import driver\n", + "\n", + "my_builder = driver.Builder().with_modules(my_functions).with_config({\"case\": \"thousands\"})\n", "output_node = [\"final_df\"]" ] }, @@ -572,9 +574,15 @@ ], "source": [ "import with_columns_example\n", - "dr = driver.Builder().with_modules(my_functions, with_columns_example).with_config({\"case\":\"millions\"}).build()\n", + "\n", + "dr = (\n", + " driver.Builder()\n", + " .with_modules(my_functions, with_columns_example)\n", + " .with_config({\"case\": \"millions\"})\n", + " .build()\n", + ")\n", "print(dr.execute(final_vars=[\"final_df\"])[\"final_df\"])\n", - "dr.visualize_execution(final_vars=[\"final_df\"])\n" + "dr.visualize_execution(final_vars=[\"final_df\"])" ] }, { @@ -658,19 +666,25 @@ ], "source": [ "import asyncio\n", - "from hamilton import async_driver\n", + "\n", "import with_columns_async\n", "\n", + "from hamilton import async_driver\n", + "\n", + "\n", "async def main():\n", " await asyncio.sleep(2)\n", - " dr = (await async_driver.Builder()\n", - " .with_modules(with_columns_async)\n", - " .with_config({\"case\":\"millions\"})\n", - " .build())\n", + " dr = (\n", + " await async_driver.Builder()\n", + " .with_modules(with_columns_async)\n", + " .with_config({\"case\": \"millions\"})\n", + " .build()\n", + " )\n", " results = await dr.execute([\"final_df\"])\n", " print(results[\"final_df\"])\n", "\n", - "await main()\n" + "\n", + "await main()" ] }, { diff --git a/examples/parallelism/file_processing/notebook.ipynb b/examples/parallelism/file_processing/notebook.ipynb index cd30a4e30..2295cb0da 100644 --- a/examples/parallelism/file_processing/notebook.ipynb +++ b/examples/parallelism/file_processing/notebook.ipynb @@ -16,7 +16,7 @@ "id": "8c6ee201", "metadata": {}, "source": [ - "# Parallelism: file processing [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/parallelism/file_processing/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/parallelism/file_processing/notebook.ipynb)\n" + "# Parallelism: file processing [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/parallelism/file_processing/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/parallelism/file_processing/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\parallelism\\file_processing\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\parallelism\\file_processing\\notebook.ipynb)\n" ] }, { @@ -35,9 +35,10 @@ } ], "source": [ - "import matplotlib\n", - "import pandas as pd\n", - "import aggregate_data, list_data, process_data\n", + "import aggregate_data\n", + "import list_data\n", + "import process_data\n", + "\n", "from hamilton import driver\n", "from hamilton.execution import executors" ] @@ -67,12 +68,14 @@ } ], "source": [ - "dr = driver.Builder(). \\\n", - " enable_dynamic_execution(allow_experimental_mode=True) \\\n", - " .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=100)) \\\n", - " .with_local_executor(executors.SynchronousLocalTaskExecutor()) \\\n", - " .with_modules(aggregate_data, list_data, process_data) \\\n", - " .build()" + "dr = (\n", + " driver.Builder()\n", + " .enable_dynamic_execution(allow_experimental_mode=True)\n", + " .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=100))\n", + " .with_local_executor(executors.SynchronousLocalTaskExecutor())\n", + " .with_modules(aggregate_data, list_data, process_data)\n", + " .build()\n", + ")" ] }, { @@ -422,7 +425,7 @@ } ], "source": [ - "dr.visualize_execution([\"statistics_by_city\"], \"./dag\", {}, inputs={\"data_dir\" : \"data\"})" + "dr.visualize_execution([\"statistics_by_city\"], \"./dag\", {}, inputs={\"data_dir\": \"data\"})" ] }, { @@ -432,7 +435,7 @@ "metadata": {}, "outputs": [], "source": [ - "df = dr.execute([\"statistics_by_city\"], inputs={\"data_dir\" : \"data\"})[\"statistics_by_city\"]\n", + "df = dr.execute([\"statistics_by_city\"], inputs={\"data_dir\": \"data\"})[\"statistics_by_city\"]\n", "df" ] }, @@ -443,9 +446,9 @@ "metadata": {}, "outputs": [], "source": [ - "df[\"mean_guest_satisfaction\"]\\\n", - " .sort_values(ascending=False)\\\n", - " .plot(kind=\"bar\", title=\"Guest satisfaction by city\", ylim=(80, 100))" + "df[\"mean_guest_satisfaction\"].sort_values(ascending=False).plot(\n", + " kind=\"bar\", title=\"Guest satisfaction by city\", ylim=(80, 100)\n", + ")" ] }, { @@ -455,8 +458,7 @@ "metadata": {}, "outputs": [], "source": [ - "df[\"mean_price\"]\\\n", - " .sort_values(ascending=False).plot(kind=\"bar\")" + "df[\"mean_price\"].sort_values(ascending=False).plot(kind=\"bar\")" ] }, { @@ -477,9 +479,7 @@ "metadata": {}, "outputs": [], "source": [ - "df[\"mean_price_per_person\"]\\\n", - " .sort_values(ascending=False)\\\n", - " .plot(kind=\"bar\")" + "df[\"mean_price_per_person\"].sort_values(ascending=False).plot(kind=\"bar\")" ] }, { @@ -489,9 +489,7 @@ "metadata": {}, "outputs": [], "source": [ - "df[\"cleanliness_ratings_mean\"]\\\n", - " .sort_values(ascending=False)\\\n", - " .plot(kind=\"bar\", ylim=(8,10))" + "df[\"cleanliness_ratings_mean\"].sort_values(ascending=False).plot(kind=\"bar\", ylim=(8, 10))" ] }, { diff --git a/examples/parallelism/lazy_threadpool_execution/notebook.ipynb b/examples/parallelism/lazy_threadpool_execution/notebook.ipynb index a1253012c..3c36e02ea 100644 --- a/examples/parallelism/lazy_threadpool_execution/notebook.ipynb +++ b/examples/parallelism/lazy_threadpool_execution/notebook.ipynb @@ -14,21 +14,23 @@ ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "# run me in google colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/parallelism/lazy_threadpool_execution/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/parallelism/lazy_threadpool_execution/notebook.ipynb)\n", - "id": "7b55978b426b6e42" + "id": "7b55978b426b6e42", + "metadata": {}, + "source": [ + "# run me in google colab [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/parallelism/lazy_threadpool_execution/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/parallelism/lazy_threadpool_execution/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\parallelism\\lazy_threadpool_execution\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\parallelism\\lazy_threadpool_execution\\notebook.ipynb)\n" + ] }, { + "cell_type": "code", + "execution_count": 1, + "id": "a1f2f8937a0b0488", "metadata": { "ExecuteTime": { "end_time": "2025-01-02T05:00:01.808537Z", "start_time": "2025-01-02T04:59:53.847872Z" } }, - "cell_type": "code", - "source": "%load_ext hamilton.plugins.jupyter_magic", - "id": "a1f2f8937a0b0488", "outputs": [ { "name": "stderr", @@ -39,25 +41,176 @@ ] } ], - "execution_count": 1 + "source": [ + "%load_ext hamilton.plugins.jupyter_magic" + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "32c01561386fc348", + "metadata": {}, "source": [ "# Create a module with some functions\n", "This hopefully shows a good example of what could be parallelized given the structure of the DAG." - ], - "id": "32c01561386fc348" + ] }, { + "cell_type": "code", + "execution_count": 2, + "id": "8e0e3b7a96ca1d44", "metadata": { "ExecuteTime": { "end_time": "2025-01-02T05:00:16.514305Z", "start_time": "2025-01-02T05:00:16.101379Z" } }, - "cell_type": "code", + "outputs": [ + { + "data": { + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "c\n", + "\n", + "c\n", + "str\n", + "\n", + "\n", + "\n", + "e\n", + "\n", + "e\n", + "str\n", + "\n", + "\n", + "\n", + "c->e\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x\n", + "\n", + "x\n", + "str\n", + "\n", + "\n", + "\n", + "s\n", + "\n", + "s\n", + "str\n", + "\n", + "\n", + "\n", + "x->s\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "a\n", + "\n", + "a\n", + "str\n", + "\n", + "\n", + "\n", + "a->c\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "e->s\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "b\n", + "\n", + "b\n", + "str\n", + "\n", + "\n", + "\n", + "b->c\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "z\n", + "\n", + "z\n", + "str\n", + "\n", + "\n", + "\n", + "z->x\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "d\n", + "\n", + "d\n", + "str\n", + "\n", + "\n", + "\n", + "d->e\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "y\n", + "\n", + "y\n", + "str\n", + "\n", + "\n", + "\n", + "y->x\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ "%%cell_to_module my_functions --display\n", "\n", @@ -117,46 +270,165 @@ " time.sleep(3)\n", " return x + \" \" + e\n", "\n" - ], - "id": "8e0e3b7a96ca1d44", - "outputs": [ - { - "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nc\n\nc\nstr\n\n\n\ne\n\ne\nstr\n\n\n\nc->e\n\n\n\n\n\nx\n\nx\nstr\n\n\n\ns\n\ns\nstr\n\n\n\nx->s\n\n\n\n\n\na\n\na\nstr\n\n\n\na->c\n\n\n\n\n\ne->s\n\n\n\n\n\nb\n\nb\nstr\n\n\n\nb->c\n\n\n\n\n\nz\n\nz\nstr\n\n\n\nz->x\n\n\n\n\n\nd\n\nd\nstr\n\n\n\nd->e\n\n\n\n\n\ny\n\ny\nstr\n\n\n\ny->x\n\n\n\n\n\nfunction\n\nfunction\n\n\n\n", - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "execution_count": 2 + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "# Run it without the adapter", - "id": "60355598274f9b79" + "id": "60355598274f9b79", + "metadata": {}, + "source": [ + "# Run it without the adapter" + ] }, { + "cell_type": "code", + "execution_count": 6, + "id": "fcb0677daf5b4a31", "metadata": { "ExecuteTime": { "end_time": "2025-01-02T05:02:44.719265Z", "start_time": "2025-01-02T05:02:44.423066Z" } }, - "cell_type": "code", - "source": [ - "from hamilton import driver\n", - "dr = driver.Builder().with_modules(my_functions).build()\n", - "dr" - ], - "id": "fcb0677daf5b4a31", "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nc\n\nc\nstr\n\n\n\ne\n\ne\nstr\n\n\n\nc->e\n\n\n\n\n\nx\n\nx\nstr\n\n\n\ns\n\ns\nstr\n\n\n\nx->s\n\n\n\n\n\na\n\na\nstr\n\n\n\na->c\n\n\n\n\n\ne->s\n\n\n\n\n\nb\n\nb\nstr\n\n\n\nb->c\n\n\n\n\n\nz\n\nz\nstr\n\n\n\nz->x\n\n\n\n\n\nd\n\nd\nstr\n\n\n\nd->e\n\n\n\n\n\ny\n\ny\nstr\n\n\n\ny->x\n\n\n\n\n\nfunction\n\nfunction\n\n\n\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "c\n", + "\n", + "c\n", + "str\n", + "\n", + "\n", + "\n", + "e\n", + "\n", + "e\n", + "str\n", + "\n", + "\n", + "\n", + "c->e\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x\n", + "\n", + "x\n", + "str\n", + "\n", + "\n", + "\n", + "s\n", + "\n", + "s\n", + "str\n", + "\n", + "\n", + "\n", + "x->s\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "a\n", + "\n", + "a\n", + "str\n", + "\n", + "\n", + "\n", + "a->c\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "e->s\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "b\n", + "\n", + "b\n", + "str\n", + "\n", + "\n", + "\n", + "b->c\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "z\n", + "\n", + "z\n", + "str\n", + "\n", + "\n", + "\n", + "z->x\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "d\n", + "\n", + "d\n", + "str\n", + "\n", + "\n", + "\n", + "d->e\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "y\n", + "\n", + "y\n", + "str\n", + "\n", + "\n", + "\n", + "y->x\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], "text/plain": [ "" ] @@ -166,24 +438,23 @@ "output_type": "execute_result" } ], - "execution_count": 6 + "source": [ + "from hamilton import driver\n", + "\n", + "dr = driver.Builder().with_modules(my_functions).build()\n", + "dr" + ] }, { + "cell_type": "code", + "execution_count": 7, + "id": "960f9f5d5f018b38", "metadata": { "ExecuteTime": { "end_time": "2025-01-02T05:03:18.620774Z", "start_time": "2025-01-02T05:02:51.536385Z" } }, - "cell_type": "code", - "source": [ - "start = time.time()\n", - "r = dr.execute([\"s\", \"x\", \"a\"])\n", - "print(\"got return from dr\")\n", - "print(r)\n", - "print(\"Time taken with\", time.time() - start)" - ], - "id": "960f9f5d5f018b38", "outputs": [ { "name": "stdout", @@ -204,35 +475,171 @@ ] } ], - "execution_count": 7 + "source": [ + "start = time.time()\n", + "r = dr.execute([\"s\", \"x\", \"a\"])\n", + "print(\"got return from dr\")\n", + "print(r)\n", + "print(\"Time taken with\", time.time() - start)" + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "# Run it with the adapter -- note the parallelism & time taken", - "id": "8a1d2b183b914034" + "id": "8a1d2b183b914034", + "metadata": {}, + "source": [ + "# Run it with the adapter -- note the parallelism & time taken" + ] }, { + "cell_type": "code", + "execution_count": 8, + "id": "63853f111ef28439", "metadata": { "ExecuteTime": { "end_time": "2025-01-02T05:03:18.904861Z", "start_time": "2025-01-02T05:03:18.632385Z" } }, - "cell_type": "code", - "source": [ - "from hamilton import driver\n", - "from hamilton.plugins import h_threadpool\n", - "\n", - "adapter = h_threadpool.FutureAdapter()\n", - "dr = driver.Builder().with_modules(my_functions).with_adapters(adapter).build()\n", - "dr" - ], - "id": "63853f111ef28439", "outputs": [ { "data": { - "image/svg+xml": "\n\n\n\n\n\n\n\ncluster__legend\n\nLegend\n\n\n\nc\n\nc\nstr\n\n\n\ne\n\ne\nstr\n\n\n\nc->e\n\n\n\n\n\nx\n\nx\nstr\n\n\n\ns\n\ns\nstr\n\n\n\nx->s\n\n\n\n\n\na\n\na\nstr\n\n\n\na->c\n\n\n\n\n\ne->s\n\n\n\n\n\nb\n\nb\nstr\n\n\n\nb->c\n\n\n\n\n\nz\n\nz\nstr\n\n\n\nz->x\n\n\n\n\n\nd\n\nd\nstr\n\n\n\nd->e\n\n\n\n\n\ny\n\ny\nstr\n\n\n\ny->x\n\n\n\n\n\nfunction\n\nfunction\n\n\n\n", + "image/svg+xml": [ + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "cluster__legend\n", + "\n", + "Legend\n", + "\n", + "\n", + "\n", + "c\n", + "\n", + "c\n", + "str\n", + "\n", + "\n", + "\n", + "e\n", + "\n", + "e\n", + "str\n", + "\n", + "\n", + "\n", + "c->e\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "x\n", + "\n", + "x\n", + "str\n", + "\n", + "\n", + "\n", + "s\n", + "\n", + "s\n", + "str\n", + "\n", + "\n", + "\n", + "x->s\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "a\n", + "\n", + "a\n", + "str\n", + "\n", + "\n", + "\n", + "a->c\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "e->s\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "b\n", + "\n", + "b\n", + "str\n", + "\n", + "\n", + "\n", + "b->c\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "z\n", + "\n", + "z\n", + "str\n", + "\n", + "\n", + "\n", + "z->x\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "d\n", + "\n", + "d\n", + "str\n", + "\n", + "\n", + "\n", + "d->e\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "y\n", + "\n", + "y\n", + "str\n", + "\n", + "\n", + "\n", + "y->x\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "function\n", + "\n", + "function\n", + "\n", + "\n", + "\n" + ], "text/plain": [ "" ] @@ -242,24 +649,25 @@ "output_type": "execute_result" } ], - "execution_count": 8 + "source": [ + "from hamilton import driver\n", + "from hamilton.plugins import h_threadpool\n", + "\n", + "adapter = h_threadpool.FutureAdapter()\n", + "dr = driver.Builder().with_modules(my_functions).with_adapters(adapter).build()\n", + "dr" + ] }, { + "cell_type": "code", + "execution_count": 9, + "id": "1bb057f4277705de", "metadata": { "ExecuteTime": { "end_time": "2025-01-02T05:03:30.949086Z", "start_time": "2025-01-02T05:03:18.925667Z" } }, - "cell_type": "code", - "source": [ - "start = time.time()\n", - "r = dr.execute([\"s\", \"x\", \"a\"])\n", - "print(\"got return from dr\")\n", - "print(r)\n", - "print(\"Time taken with\", time.time() - start)" - ], - "id": "1bb057f4277705de", "outputs": [ { "name": "stdout", @@ -280,25 +688,31 @@ ] } ], - "execution_count": 9 + "source": [ + "start = time.time()\n", + "r = dr.execute([\"s\", \"x\", \"a\"])\n", + "print(\"got return from dr\")\n", + "print(r)\n", + "print(\"Time taken with\", time.time() - start)" + ] }, { - "metadata": {}, "cell_type": "markdown", + "id": "56e9fb7445639984", + "metadata": {}, "source": [ "# 27 seconds vs 12 seconds\n", "\n", "With the adapter we see a significant improvement in time taken to execute the DAG. This is because the adapter is able to parallelize the execution." - ], - "id": "56e9fb7445639984" + ] }, { - "metadata": {}, "cell_type": "code", - "outputs": [], "execution_count": null, - "source": "", - "id": "e31132a4fd211887" + "id": "e31132a4fd211887", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/examples/parallelism/star_counting/notebook.ipynb b/examples/parallelism/star_counting/notebook.ipynb index 65f1a07ae..fd76279a1 100644 --- a/examples/parallelism/star_counting/notebook.ipynb +++ b/examples/parallelism/star_counting/notebook.ipynb @@ -16,7 +16,7 @@ "id": "3f73ed72", "metadata": {}, "source": [ - "# Counting stars on GitHub [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/parallelism/star_counting/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/parallelism/star_counting/notebook.ipynb)\n" + "# Counting stars on GitHub [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/parallelism/star_counting/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/parallelism/star_counting/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\parallelism\\star_counting\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\parallelism\\star_counting\\notebook.ipynb)\n" ] }, { @@ -35,12 +35,10 @@ } ], "source": [ - "import click\n", + "import functions\n", "\n", "from hamilton import driver\n", - "import functions\n", - "from hamilton.execution import executors\n", - "import pandas as pd" + "from hamilton.execution import executors" ] }, { @@ -51,11 +49,8 @@ "outputs": [], "source": [ "# TODO -- delete\n", - "github_api_key=\"...\"\n", - "repositories=[\n", - " 'dagworks-inc/hamilton',\n", - " 'stitchfix/hamilton'\n", - "]" + "github_api_key = \"...\"\n", + "repositories = [\"dagworks-inc/hamilton\", \"stitchfix/hamilton\"]" ] }, { @@ -65,11 +60,13 @@ "metadata": {}, "outputs": [], "source": [ - "dr = driver.Builder() \\\n", - " .enable_dynamic_execution(allow_experimental_mode=True) \\\n", - " .with_modules(functions) \\\n", - " .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=10)) \\\n", - " .build()" + "dr = (\n", + " driver.Builder()\n", + " .enable_dynamic_execution(allow_experimental_mode=True)\n", + " .with_modules(functions)\n", + " .with_remote_executor(executors.MultiThreadingExecutor(max_tasks=10))\n", + " .build()\n", + ")" ] }, { @@ -231,9 +228,11 @@ ], "source": [ "dr.visualize_execution(\n", - " ['final_count'], None, {}, inputs={\n", - " 'github_api_key': github_api_key,\n", - " 'repositories': list(repositories)})" + " [\"final_count\"],\n", + " None,\n", + " {},\n", + " inputs={\"github_api_key\": github_api_key, \"repositories\": list(repositories)},\n", + ")" ] }, { @@ -243,8 +242,10 @@ "metadata": {}, "outputs": [], "source": [ - "df = dr.execute(['final_count', 'unique_stargazers'], inputs={'github_api_key': github_api_key,\n", - " 'repositories': list(repositories)})['unique_stargazers']" + "df = dr.execute(\n", + " [\"final_count\", \"unique_stargazers\"],\n", + " inputs={\"github_api_key\": github_api_key, \"repositories\": list(repositories)},\n", + ")[\"unique_stargazers\"]" ] }, { @@ -275,8 +276,10 @@ } ], "source": [ - "df['to_sum'] = 1\n", - "df.set_index('starred_at').sort_index().cumsum()['to_sum'].plot(title=f\"unique across {','.join(repositories)}\")" + "df[\"to_sum\"] = 1\n", + "df.set_index(\"starred_at\").sort_index().cumsum()[\"to_sum\"].plot(\n", + " title=f\"unique across {','.join(repositories)}\"\n", + ")" ] }, { diff --git a/examples/people_data_labs/notebook.ipynb b/examples/people_data_labs/notebook.ipynb index 113d10ba0..1551fcab0 100644 --- a/examples/people_data_labs/notebook.ipynb +++ b/examples/people_data_labs/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# People Data Labs + Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/people_data_labs/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/people_data_labs/notebook.ipynb)\n", + "# People Data Labs + Hamilton [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/people_data_labs/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/people_data_labs/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\people_data_labs\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\people_data_labs\\notebook.ipynb)\n", + "\n", "\n", "This notebook will teach you how to use People Data Labs (PDL) [Company enrichment](https://docs.peopledatalabs.com/docs/company-enrichment-api) data along stock market data for financial analysis. We will introduce the Python library [Hamilon](https://hamilton.apache.org/?badge=latest) to help create data transformations.\n", "\n", @@ -37,10 +38,10 @@ "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "from hamilton import driver\n", "from IPython.display import display\n", "\n", + "from hamilton import driver\n", + "\n", "# Loads a \"jupyter magic\" that allows special notebook interactions\n", "%load_ext hamilton.plugins.jupyter_magic" ] @@ -232,11 +233,7 @@ "metadata": {}, "outputs": [], "source": [ - "hamilton_driver = (\n", - " driver.Builder()\n", - " .with_modules(data_preparation)\n", - " .build()\n", - ")" + "hamilton_driver = driver.Builder().with_modules(data_preparation).build()" ] }, { @@ -470,7 +467,7 @@ "# display the execution path and the result for `company_info`\n", "display(\n", " hamilton_driver.visualize_execution([\"company_info\"], inputs=inputs),\n", - " results[\"company_info\"].head(3)\n", + " results[\"company_info\"].head(3),\n", ")" ] }, @@ -1036,11 +1033,7 @@ } ], "source": [ - "analytics_driver = (\n", - " driver.Builder()\n", - " .with_modules(data_preparation, analytics)\n", - " .build()\n", - ")\n", + "analytics_driver = driver.Builder().with_modules(data_preparation, analytics).build()\n", "analytics_driver" ] }, @@ -1467,7 +1460,7 @@ "inputs = dict(\n", " pdl_file=\"pdl_data.json\",\n", " stock_file=\"stock_data.json\",\n", - " rounds_selection=[\"series_a\", \"series_b\", \"series_c\", \"series_d\"]\n", + " rounds_selection=[\"series_a\", \"series_b\", \"series_c\", \"series_d\"],\n", ")\n", "\n", "final_vars = [\n", diff --git a/examples/plotly/notebook.ipynb b/examples/plotly/notebook.ipynb index 8d833727f..b2aab1491 100644 --- a/examples/plotly/notebook.ipynb +++ b/examples/plotly/notebook.ipynb @@ -16,7 +16,7 @@ "id": "4a44c333", "metadata": {}, "source": [ - "# Hamilton + Plotly integration [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/plotly/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/plotly/notebook.ipynb)\n" + "# Hamilton + Plotly integration [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/plotly/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/plotly/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\plotly\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\plotly\\notebook.ipynb)\n" ] }, { @@ -60,16 +60,11 @@ "dag_config = {\n", " \"test_size_fraction\": 0.95,\n", " \"shuffle_train_test_split\": True,\n", - " \"data_loader\" : \"digits\",\n", - " \"clf\" : \"svm\",\n", - " \"penalty\" : \"l2\"\n", + " \"data_loader\": \"digits\",\n", + " \"clf\": \"svm\",\n", + " \"penalty\": \"l2\",\n", "}\n", - "dr = (\n", - " driver.Builder()\n", - " .with_config(dag_config)\n", - " .with_modules(model_training)\n", - " .build()\n", - ")" + "dr = driver.Builder().with_config(dag_config).with_modules(model_training).build()" ] }, { @@ -440,17 +435,17 @@ ], "source": [ "materializers = [\n", - " to.plotly(\n", - " dependencies=[\"confusion_matrix_figure\"],\n", - " id=\"confusion_matrix_png\",\n", - " path=\"./static.png\",\n", - " ),\n", - " to.html(\n", - " dependencies=[\"confusion_matrix_figure\"],\n", - " id=\"confusion_matrix_html\",\n", - " path=\"./interactive.html\",\n", - " ),\n", - " ]\n", + " to.plotly(\n", + " dependencies=[\"confusion_matrix_figure\"],\n", + " id=\"confusion_matrix_png\",\n", + " path=\"./static.png\",\n", + " ),\n", + " to.html(\n", + " dependencies=[\"confusion_matrix_figure\"],\n", + " id=\"confusion_matrix_html\",\n", + " path=\"./interactive.html\",\n", + " ),\n", + "]\n", "\n", "dr.visualize_materialization(*materializers)" ] diff --git a/examples/polars/materialization/notebook.ipynb b/examples/polars/materialization/notebook.ipynb index cec55598e..b500191a0 100644 --- a/examples/polars/materialization/notebook.ipynb +++ b/examples/polars/materialization/notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Polars integration tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/polars/materialization/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/polars/materialization/notebook.ipynb)\n" + "# Polars integration tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/polars/materialization/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/polars/materialization/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\polars\\materialization\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\polars\\materialization\\notebook.ipynb)\n" ] }, { @@ -38,11 +38,10 @@ ], "source": [ "import polars as pl\n", - "import sys\n", + "\n", "# Add the hamilton module to your path - optinal\n", "# project_dir = \"### ADD PATH HERE ###\"\n", "# sys.path.append(project_dir)\n", - "\n", "from hamilton import driver\n", "from hamilton.io.materialization import to\n", "from hamilton.plugins import h_polars" @@ -763,7 +762,7 @@ " if_table_exists=\"append\",\n", " combine=df_builder,\n", " ),\n", - " #materialize the dataframe to a spreadsheet file\n", + " # materialize the dataframe to a spreadsheet file\n", " to.spreadsheet(\n", " dependencies=output_columns,\n", " id=\"df_to_spreadsheet\",\n", diff --git a/examples/polars/notebook.ipynb b/examples/polars/notebook.ipynb index b36ec9e99..f2a4daa2c 100644 --- a/examples/polars/notebook.ipynb +++ b/examples/polars/notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Polars materialization [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/polars/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/polars/notebook.ipynb)\n" + "# Polars materialization [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/polars/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/polars/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\polars\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\polars\\notebook.ipynb)\n" ] }, { @@ -312,19 +312,14 @@ }, "outputs": [], "source": [ - "from hamilton import driver, base\n", + "from hamilton import base, driver\n", "from hamilton.plugins import h_polars\n", "\n", "inputs = {\n", " \"base_df_location\": \"dummy_value\",\n", "}\n", "adapter = base.SimplePythonGraphAdapter(result_builder=h_polars.PolarsDataFrameResult())\n", - "dr = (driver\n", - " .Builder()\n", - " .with_modules(spend_calculations)\n", - " .with_adapters(adapter)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(spend_calculations).with_adapters(adapter).build()\n", "\n", "output_columns = [\n", " \"spend\",\n", diff --git a/examples/polars/with_columns/notebook.ipynb b/examples/polars/with_columns/notebook.ipynb index 75174e6de..704bfd342 100644 --- a/examples/polars/with_columns/notebook.ipynb +++ b/examples/polars/with_columns/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example of using with_columns for Polars DataFrame [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/polars/with_columns/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/polars/with_columns/notebook.ipynb)\n", + "# Example of using with_columns for Polars DataFrame [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/polars/with_columns/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/polars/with_columns/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\polars\\with_columns\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\polars\\with_columns\\notebook.ipynb)\n", + "\n", "\n", "This allows you to efficiently run groups of map operations on a dataframe.\n", "Here's an example of calling it -- if you've seen `@subdag`, you should be familiar with the concepts." @@ -36,10 +37,11 @@ ], "source": [ "%reload_ext hamilton.plugins.jupyter_magic\n", - "from hamilton import driver\n", "import my_functions\n", "\n", - "my_builder = driver.Builder().with_modules(my_functions).with_config({\"case\":\"thousands\"})\n", + "from hamilton import driver\n", + "\n", + "my_builder = driver.Builder().with_modules(my_functions).with_config({\"case\": \"thousands\"})\n", "output_node = [\"final_df\"]" ] }, @@ -603,9 +605,15 @@ ], "source": [ "import with_columns_example\n", - "dr = driver.Builder().with_modules(my_functions, with_columns_example).with_config({\"case\":\"millions\"}).build()\n", + "\n", + "dr = (\n", + " driver.Builder()\n", + " .with_modules(my_functions, with_columns_example)\n", + " .with_config({\"case\": \"millions\"})\n", + " .build()\n", + ")\n", "print(dr.execute(final_vars=[\"final_df\"])[\"final_df\"])\n", - "dr.visualize_execution(final_vars=[\"final_df\"])\n" + "dr.visualize_execution(final_vars=[\"final_df\"])" ] }, { @@ -625,10 +633,13 @@ "outputs": [], "source": [ "%reload_ext hamilton.plugins.jupyter_magic\n", - "from hamilton import driver\n", "import my_functions_lazy\n", "\n", - "my_builder_lazy = driver.Builder().with_modules(my_functions_lazy).with_config({\"case\":\"thousands\"})\n", + "from hamilton import driver\n", + "\n", + "my_builder_lazy = (\n", + " driver.Builder().with_modules(my_functions_lazy).with_config({\"case\": \"thousands\"})\n", + ")\n", "output_node = [\"final_df\"]" ] }, @@ -1192,19 +1203,21 @@ ], "source": [ "import with_columns_lazy_example\n", + "\n", "from hamilton import base\n", "from hamilton.plugins import h_polars\n", "\n", "dr = (\n", " driver.Builder()\n", " .with_adapter(\n", - " adapter=base.SimplePythonGraphAdapter(result_builder=h_polars.PolarsDataFrameResult()))\n", + " adapter=base.SimplePythonGraphAdapter(result_builder=h_polars.PolarsDataFrameResult())\n", + " )\n", " .with_modules(my_functions_lazy, with_columns_lazy_example)\n", - " .with_config({\"case\":\"millions\"})\n", + " .with_config({\"case\": \"millions\"})\n", " .build()\n", - " )\n", + ")\n", "print(dr.execute(final_vars=[\"final_df\"]))\n", - "dr.visualize_execution(final_vars=[\"final_df\"])\n" + "dr.visualize_execution(final_vars=[\"final_df\"])" ] }, { diff --git a/examples/prefect/hamilton_prefect_notebook.ipynb b/examples/prefect/hamilton_prefect_notebook.ipynb index d746927c1..6c5138261 100644 --- a/examples/prefect/hamilton_prefect_notebook.ipynb +++ b/examples/prefect/hamilton_prefect_notebook.ipynb @@ -16,7 +16,8 @@ "id": "7d635d0e", "metadata": {}, "source": [ - "# Hamilton + Prefect [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/prefect/hamilton_prefect_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/prefect/hamilton_prefect_notebook.ipynb)\n", + "# Hamilton + Prefect [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/prefect/hamilton_prefect_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/prefect/hamilton_prefect_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\prefect\\hamilton_prefect_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\prefect\\hamilton_prefect_notebook.ipynb)\n", + "\n", "\n", "\n", "\n", @@ -105,6 +106,7 @@ "\n", "import pandas as pd\n", "from prefect import flow, task\n", + "\n", "from hamilton import base, driver" ] }, @@ -144,7 +146,7 @@ " final_vars=prepare_data.ALL_FEATURES + [label],\n", " inputs={\"raw_df\": raw_df},\n", " )\n", - " \n", + "\n", " # uncomment these lines to produce a local DAG visualization file:\n", " # dr.visualize_execution(\n", " # final_vars=prepare_data.ALL_FEATURES + [label],\n", diff --git a/examples/ray/hello_world/business_logic.py b/examples/ray/hello_world/business_logic.py index b4f559c0d..8255669a3 120000 --- a/examples/ray/hello_world/business_logic.py +++ b/examples/ray/hello_world/business_logic.py @@ -1 +1 @@ -../../hello_world/my_functions.py \ No newline at end of file +../../hello_world/my_functions.py diff --git a/examples/ray/hello_world/notebook.ipynb b/examples/ray/hello_world/notebook.ipynb index 01a4fcb50..50aa8c90f 100644 --- a/examples/ray/hello_world/notebook.ipynb +++ b/examples/ray/hello_world/notebook.ipynb @@ -16,7 +16,7 @@ "id": "97065bbc", "metadata": {}, "source": [ - "# Ray + Hamilton tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/ray/hello_world/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/ray/hello_world/notebook.ipynb)\n" + "# Ray + Hamilton tutorial [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/ray/hello_world/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/ray/hello_world/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\ray\\hello_world\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\ray\\hello_world\\notebook.ipynb)\n" ] }, { @@ -64,7 +64,6 @@ "outputs": [], "source": [ "import ray\n", - "import pandas as pd\n", "\n", "from hamilton import base, driver\n", "from hamilton.plugins import h_ray" @@ -161,11 +160,11 @@ "# Set up the driver, input and output columns\n", "\n", "config = { # could load data here via some other means, or delegate to a module as we have done.\n", - " # 'signups': pd.Series([1, 10, 50, 100, 200, 400]),\n", - " \"signups_location\": \"some_path\",\n", - " # 'spend': pd.Series([10, 10, 20, 40, 40, 50]),\n", - " \"spend_location\": \"some_other_path\",\n", - " }\n", + " # 'signups': pd.Series([1, 10, 50, 100, 200, 400]),\n", + " \"signups_location\": \"some_path\",\n", + " # 'spend': pd.Series([10, 10, 20, 40, 40, 50]),\n", + " \"spend_location\": \"some_other_path\",\n", + "}\n", "adapter = h_ray.RayGraphAdapter(result_builder=base.PandasDataFrameResult())\n", "dr = driver.Driver(config, spend_calculations, adapter=adapter)\n", "output_columns = [\n", diff --git a/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb b/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb index b061ff3c6..31968e79a 100644 --- a/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb +++ b/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Hamilton UI Adapter [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb)\n", + "# Hamilton UI Adapter [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/ray/ray_Hamilton_UI_tracking/hamilton_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\ray\\ray_Hamilton_UI_tracking\\hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\ray\\ray_Hamilton_UI_tracking\\hamilton_notebook.ipynb)\n", + "\n", "\n", "\n", "Needs a running instance of Hamilton UI: https://hamilton.apache.org/concepts/ui/" @@ -33,15 +34,16 @@ "username = \"admin\"\n", "\n", "tracker_ray = HamiltonTracker(\n", - " project_id=project_id,\n", - " username=username,\n", - " dag_name=\"telemetry_with_ray\",)\n", + " project_id=project_id,\n", + " username=username,\n", + " dag_name=\"telemetry_with_ray\",\n", + ")\n", "\n", "tracker_without_ray = HamiltonTracker(\n", - " project_id=project_id,\n", - " username=username,\n", - " dag_name=\"telemetry_without_ray\",\n", - " )" + " project_id=project_id,\n", + " username=username,\n", + " dag_name=\"telemetry_without_ray\",\n", + ")" ] }, { @@ -78,8 +80,9 @@ "metadata": {}, "outputs": [], "source": [ - "from hamilton import driver\n", - "import ray_lineage" + "import ray_lineage\n", + "\n", + "from hamilton import driver" ] }, { @@ -104,7 +107,7 @@ "finally:\n", " dr_without_ray = driver.Builder().with_modules(ray_lineage).with_adapters(tracker).build()\n", " result_without_ray = dr_without_ray.execute(final_vars=[\"node_5s\", \"add_1_to_previous\"])\n", - " print(result_without_ray) \n" + " print(result_without_ray)" ] } ], diff --git a/examples/reusing_functions/reusing_functions.ipynb b/examples/reusing_functions/reusing_functions.ipynb index 24c05424d..9279b27c6 100644 --- a/examples/reusing_functions/reusing_functions.ipynb +++ b/examples/reusing_functions/reusing_functions.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Resusing Functions Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/reusing_functions/reusing_functions.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/reusing_functions/reusing_functions.ipynb)\n", + "# Resusing Functions Example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/reusing_functions/reusing_functions.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/reusing_functions/reusing_functions.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\reusing_functions\\reusing_functions.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\reusing_functions\\reusing_functions.ipynb)\n", + "\n", "\n", "\n", "This notebook demonstrates the use of the subdag operator.\n", diff --git a/examples/reverse_etl/notebook.ipynb b/examples/reverse_etl/notebook.ipynb index e5c48810b..e4e57e7df 100644 --- a/examples/reverse_etl/notebook.ipynb +++ b/examples/reverse_etl/notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Reverse ETL for your timesheets [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/reverse_etl/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/reverse_etl/notebook.ipynb)\n", + "# Reverse ETL for your timesheets [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/reverse_etl/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/reverse_etl/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\reverse_etl\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\reverse_etl\\notebook.ipynb)\n", + "\n", "\n", "\n", "This notebook contains a tutorial on how to use Hamilton to extract time tracking data from Timewarrior, transform it into a timesheet, and upload it to Google Sheets. You'll learn how to create dataflows in a notebook with Hamilton, and how to push data online using the Google Sheets API. The resulting dataflow can also be used as a Timewarrior extension to upload your timesheet using `timew report upload_timesheet.py`. See the [README](./README.md) for details! " @@ -407,12 +408,7 @@ "source": [ "from hamilton import driver\n", "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(timesheet_module)\n", - " .with_config(dict(mode=\"demo\"))\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(timesheet_module).with_config(dict(mode=\"demo\")).build()\n", "results = dr.execute([\"timesheet\"])\n", "\n", "results[\"timesheet\"].head()" @@ -1235,7 +1231,7 @@ "inputs = dict(\n", " spreadsheet_id=..., # add your spreadsheet_id\n", " sheet_id=\"Sheet1\", # default name in a new spreadsheet\n", - " credentials=..., # add your credentials\n", + " credentials=..., # add your credentials\n", ")\n", "results = dr.execute([\"insert_records_query\", \"format_spreadsheet_query\"], inputs=inputs)\n", "\n", diff --git a/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb b/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb index 8a2d37f33..6333fe441 100644 --- a/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb +++ b/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Creating the individua modules within Jupyter [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb)\n", + "# Creating the individua modules within Jupyter [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/scikit-learn/species_distribution_modeling/hamilton_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\scikit-learn\\species_distribution_modeling\\hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\scikit-learn\\species_distribution_modeling\\hamilton_notebook.ipynb)\n", + "\n", "\n", "\n", "Add `--display --write_to_file` to visualize and write to the same named .py file. This will override existing files!" @@ -30,7 +31,7 @@ "%load_ext autoreload\n", "# set it to only reload the modules imported\n", "%autoreload 1\n", - "%reload_ext hamilton.plugins.jupyter_magic\n" + "%reload_ext hamilton.plugins.jupyter_magic" ] }, { @@ -889,11 +890,20 @@ } ], "source": [ + "import grids\n", + "import load_data\n", + "import postprocessing_results\n", + "import preprocessing\n", + "import train_and_predict\n", + "\n", "from hamilton import driver\n", - "import grids, load_data, postprocessing_results, preprocessing, train_and_predict\n", "\n", - "dr = driver.Builder().with_modules(grids, load_data, postprocessing_results, preprocessing, train_and_predict).build()\n", - "dr.visualize_execution(inputs={\"chosen_species\": \"aaa\"}, final_vars = [\"plot_species_distribution\"])" + "dr = (\n", + " driver.Builder()\n", + " .with_modules(grids, load_data, postprocessing_results, preprocessing, train_and_predict)\n", + " .build()\n", + ")\n", + "dr.visualize_execution(inputs={\"chosen_species\": \"aaa\"}, final_vars=[\"plot_species_distribution\"])" ] }, { @@ -937,16 +947,16 @@ "import matplotlib.pyplot as plt\n", "from run import plot_helper\n", "\n", - "species=(\"bradypus_variegatus_0\", \"microryzomys_minutus_0\")\n", + "species = (\"bradypus_variegatus_0\", \"microryzomys_minutus_0\")\n", "for i, name in enumerate(species):\n", " print(\"_\" * 80)\n", " print(\"Modeling distribution of species '%s'\" % name)\n", " inputs = {\"chosen_species\": name}\n", " final_vars = [\"plot_species_distribution\"]\n", - " results = dr.execute(inputs=inputs,final_vars=final_vars)[final_vars[0]]\n", - " plot_helper(i=i,**results)\n", - " \n", - "plt.show()\n" + " results = dr.execute(inputs=inputs, final_vars=final_vars)[final_vars[0]]\n", + " plot_helper(i=i, **results)\n", + "\n", + "plt.show()" ] }, { @@ -1269,39 +1279,47 @@ } ], "source": [ + "import grids\n", + "import load_data\n", + "import postprocessing_results\n", + "import preprocessing\n", + "import train_and_predict\n", + "import train_and_predict_using_mutate\n", + "\n", "from hamilton import driver\n", - "import grids, load_data, postprocessing_results, preprocessing, train_and_predict, train_and_predict_using_mutate\n", "\n", "dr = (\n", " driver.Builder()\n", " .with_modules(\n", - " grids, \n", - " load_data, \n", - " postprocessing_results, \n", - " preprocessing, \n", - " train_and_predict, \n", - " train_and_predict_using_mutate\n", - " )\n", + " grids,\n", + " load_data,\n", + " postprocessing_results,\n", + " preprocessing,\n", + " train_and_predict,\n", + " train_and_predict_using_mutate,\n", + " )\n", " .allow_module_overrides()\n", " .build()\n", - " )\n", + ")\n", "\n", - "print(f\"{dr.list_available_variables()[-3].originating_functions} is from module {dr.list_available_variables()[-3].originating_functions[0].__module__}\")\n", + "print(\n", + " f\"{dr.list_available_variables()[-3].originating_functions} is from module {dr.list_available_variables()[-3].originating_functions[0].__module__}\"\n", + ")\n", "# dr.visualize_execution(inputs={\"chosen_species\": \"aaa\"}, final_vars = [\"plot_species_distribution\"])\n", "\n", "import matplotlib.pyplot as plt\n", "from run import plot_helper\n", "\n", - "species=(\"bradypus_variegatus_0\", \"microryzomys_minutus_0\")\n", + "species = (\"bradypus_variegatus_0\", \"microryzomys_minutus_0\")\n", "for i, name in enumerate(species):\n", " print(\"_\" * 80)\n", " print(\"Modeling distribution of species '%s'\" % name)\n", " inputs = {\"chosen_species\": name}\n", " final_vars = [\"plot_species_distribution\"]\n", - " results = dr.execute(inputs=inputs,final_vars=final_vars)[final_vars[0]]\n", - " plot_helper(i=i,**results)\n", - " \n", - "plt.show()\n" + " results = dr.execute(inputs=inputs, final_vars=final_vars)[final_vars[0]]\n", + " plot_helper(i=i, **results)\n", + "\n", + "plt.show()" ] }, { @@ -1866,7 +1884,7 @@ } ], "source": [ - "dr.visualize_execution(inputs=inputs,final_vars=final_vars)" + "dr.visualize_execution(inputs=inputs, final_vars=final_vars)" ] }, { diff --git a/examples/scikit-learn/species_distribution_modeling/original_script.py b/examples/scikit-learn/species_distribution_modeling/original_script.py index 9fb62b740..eb47069e6 100644 --- a/examples/scikit-learn/species_distribution_modeling/original_script.py +++ b/examples/scikit-learn/species_distribution_modeling/original_script.py @@ -76,7 +76,7 @@ def plot_species_distribution(species=("bradypus_variegatus_0", "microryzomys_mi Plot the species distribution. """ if len(species) > 2: - print("Note: when more than two species are provided," " only the first two will be used") + print("Note: when more than two species are provided, only the first two will be used") t0 = time() diff --git a/examples/scikit-learn/transformer/hamilton_notebook.ipynb b/examples/scikit-learn/transformer/hamilton_notebook.ipynb index 29922e904..c4a9135d4 100644 --- a/examples/scikit-learn/transformer/hamilton_notebook.ipynb +++ b/examples/scikit-learn/transformer/hamilton_notebook.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Scikit-learn transformer models [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/scikit-learn/transformer/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/scikit-learn/transformer/hamilton_notebook.ipynb)\n", + "# Scikit-learn transformer models [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/scikit-learn/transformer/hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/scikit-learn/transformer/hamilton_notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\scikit-learn\\transformer\\hamilton_notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\scikit-learn\\transformer\\hamilton_notebook.ipynb)\n", + "\n", "\n", "\n", "Uncomment and run the cell below if you are in a Google Colab environment. It will:\n", @@ -78,9 +79,7 @@ "source": [ "from __future__ import annotations\n", "\n", - "import importlib\n", "import logging\n", - "import sys\n", "from types import ModuleType\n", "from typing import Any, Dict, List\n", "\n", @@ -91,7 +90,7 @@ "from sklearn.preprocessing import StandardScaler\n", "from sklearn.utils.validation import check_array, check_is_fitted\n", "\n", - "from hamilton import base, driver, log_setup, ad_hoc_utils\n", + "from hamilton import ad_hoc_utils, base, driver, log_setup\n", "\n", "logger = logging.getLogger(__name__)\n", "log_setup.setup_logging()" @@ -105,6 +104,7 @@ "source": [ "# We'll place the spend calculations into a new module\n", "\n", + "\n", "def avg_3wk_spend(spend: pd.Series) -> pd.Series:\n", " \"\"\"Rolling 3 week average spend.\"\"\"\n", " return spend.rolling(3).mean()\n", @@ -117,7 +117,7 @@ "\n", "spend_calculations = ad_hoc_utils.create_temporary_module(\n", " avg_3wk_spend, spend_per_signup, module_name=\"spend_calculations\"\n", - ")\n" + ")" ] }, { @@ -128,6 +128,7 @@ "source": [ "# We'll place the spend statistics calculations into a new module\n", "\n", + "\n", "def spend_mean(spend: pd.Series) -> float:\n", " \"\"\"Shows function creating a scalar. In this case it computes the mean of the entire column.\"\"\"\n", " return spend.mean()\n", @@ -149,8 +150,12 @@ "\n", "\n", "spend_statistics = ad_hoc_utils.create_temporary_module(\n", - " spend_mean, spend_zero_mean, spend_std_dev, spend_zero_mean_unit_variance, module_name=\"spend_statistics\"\n", - ")\n" + " spend_mean,\n", + " spend_zero_mean,\n", + " spend_std_dev,\n", + " spend_zero_mean_unit_variance,\n", + " module_name=\"spend_statistics\",\n", + ")" ] }, { @@ -264,7 +269,7 @@ " :param X: Input 2D array\n", " :return: Hamilton Driver output 2D array\n", " \"\"\"\n", - " return self.fit(X, **fit_params).transform(X)\n" + " return self.fit(X, **fit_params).transform(X)" ] }, { @@ -295,7 +300,7 @@ "]\n", "\n", "\n", - "dr = driver.Driver({}, spend_calculations,spend_statistics)\n" + "dr = driver.Driver({}, spend_calculations, spend_statistics)" ] }, { @@ -452,11 +457,11 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "hamilton_df = dr.execute(final_vars=output_columns, inputs=initial_df.to_dict(orient=\"series\"))\n", "\n", "custom_transformer = HamiltonTransformer(\n", - " config={}, modules=[spend_calculations, spend_statistics], final_vars=output_columns)\n", + " config={}, modules=[spend_calculations, spend_statistics], final_vars=output_columns\n", + ")\n", "sklearn_df = custom_transformer.fit_transform(initial_df)\n", "\n", "try:\n", @@ -464,7 +469,7 @@ "\n", "except ValueError as e:\n", " logger.warning(\"Check 1 failed; `sklearn_df` and `hamilton_df` are unequal\")\n", - " raise e\n" + " raise e" ] }, { @@ -497,7 +502,7 @@ " logger.warning(\n", " \"Check 2 failed; `pipe_custom_then_sklearn` and `hamilton_then_sklearn` are unequal\"\n", " )\n", - " raise e\n" + " raise e" ] }, { @@ -515,7 +520,6 @@ "metadata": {}, "outputs": [], "source": [ - "\n", "scaler = StandardScaler().set_output(transform=\"pandas\")\n", "\n", "scaled_df = scaler.fit_transform(initial_df)\n", @@ -537,7 +541,7 @@ " )\n", " raise e\n", "\n", - "logger.info(\"All checks passed. `HamiltonTransformer` behaves properly\")\n" + "logger.info(\"All checks passed. `HamiltonTransformer` behaves properly\")" ] }, { diff --git a/examples/slack/notebook.ipynb b/examples/slack/notebook.ipynb index ff4d26199..2c3fc7f19 100644 --- a/examples/slack/notebook.ipynb +++ b/examples/slack/notebook.ipynb @@ -16,7 +16,7 @@ "id": "5efeed20", "metadata": {}, "source": [ - "# Slack notifier [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/slack/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/slack/notebook.ipynb)\n" + "# Slack notifier [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/slack/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/slack/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\slack\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\slack\\notebook.ipynb)\n" ] }, { @@ -28,8 +28,6 @@ }, "outputs": [], "source": [ - "import pandas as pd\n", - "\n", "from hamilton import driver\n", "from hamilton.plugins.h_slack import SlackNotifier\n", "\n", diff --git a/examples/spark/pandas_on_spark/business_logic.py b/examples/spark/pandas_on_spark/business_logic.py index b4f559c0d..8255669a3 120000 --- a/examples/spark/pandas_on_spark/business_logic.py +++ b/examples/spark/pandas_on_spark/business_logic.py @@ -1 +1 @@ -../../hello_world/my_functions.py \ No newline at end of file +../../hello_world/my_functions.py diff --git a/examples/spark/pyspark/notebook.ipynb b/examples/spark/pyspark/notebook.ipynb index 2b2264a9d..a504d531b 100644 --- a/examples/spark/pyspark/notebook.ipynb +++ b/examples/spark/pyspark/notebook.ipynb @@ -16,7 +16,7 @@ "id": "66d615ca", "metadata": {}, "source": [ - "# PySpark example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/pyspark/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/pyspark/notebook.ipynb)\n" + "# PySpark example [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/pyspark/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/pyspark/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\spark\\pyspark\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\spark\\pyspark\\notebook.ipynb)\n" ] }, { @@ -26,9 +26,9 @@ "metadata": {}, "outputs": [], "source": [ - "import pyspark.sql as ps\n", "import pandas as pd\n", - "from pyspark.sql.functions import col, mean, stddev" + "import pyspark.sql as ps\n", + "from pyspark.sql.functions import col, mean" ] }, { @@ -49,53 +49,53 @@ "outputs": [], "source": [ "pd_df = pd.DataFrame(\n", - " {\n", - " \"spend\": [\n", - " 10,\n", - " 10,\n", - " 20,\n", - " 40,\n", - " 40,\n", - " 50,\n", - " 60,\n", - " 70,\n", - " 90,\n", - " 100,\n", - " 70,\n", - " 80,\n", - " 90,\n", - " 100,\n", - " 110,\n", - " 120,\n", - " 130,\n", - " 140,\n", - " 150,\n", - " 160,\n", - " ],\n", - " \"signups\": [\n", - " 1,\n", - " 10,\n", - " 50,\n", - " 100,\n", - " 200,\n", - " 400,\n", - " 600,\n", - " 800,\n", - " 1000,\n", - " 1200,\n", - " 1400,\n", - " 1600,\n", - " 1800,\n", - " 2000,\n", - " 2200,\n", - " 2400,\n", - " 2600,\n", - " 2800,\n", - " 3000,\n", - " 3200,\n", - " ],\n", - " }\n", - " )\n", + " {\n", + " \"spend\": [\n", + " 10,\n", + " 10,\n", + " 20,\n", + " 40,\n", + " 40,\n", + " 50,\n", + " 60,\n", + " 70,\n", + " 90,\n", + " 100,\n", + " 70,\n", + " 80,\n", + " 90,\n", + " 100,\n", + " 110,\n", + " 120,\n", + " 130,\n", + " 140,\n", + " 150,\n", + " 160,\n", + " ],\n", + " \"signups\": [\n", + " 1,\n", + " 10,\n", + " 50,\n", + " 100,\n", + " 200,\n", + " 400,\n", + " 600,\n", + " 800,\n", + " 1000,\n", + " 1200,\n", + " 1400,\n", + " 1600,\n", + " 1800,\n", + " 2000,\n", + " 2200,\n", + " 2400,\n", + " 2600,\n", + " 2800,\n", + " 3000,\n", + " 3200,\n", + " ],\n", + " }\n", + ")\n", "ps_df = spark_session.createDataFrame(pd_df)" ] }, @@ -138,7 +138,7 @@ } ], "source": [ - "ps_df.withColumn(\"foo\", ps_df['signups']*ps_df['spend'])" + "ps_df.withColumn(\"foo\", ps_df[\"signups\"] * ps_df[\"spend\"])" ] }, { diff --git a/examples/spark/pyspark_feature_catalog/example_usage.ipynb b/examples/spark/pyspark_feature_catalog/example_usage.ipynb index 8c1b72a4b..392204de5 100644 --- a/examples/spark/pyspark_feature_catalog/example_usage.ipynb +++ b/examples/spark/pyspark_feature_catalog/example_usage.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Feature catalog example for spark [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/pyspark_feature_catalog/example_usage.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/pyspark_feature_catalog/example_usage.ipynb)\n", + "# Feature catalog example for spark [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/pyspark_feature_catalog/example_usage.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/pyspark_feature_catalog/example_usage.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\spark\\pyspark_feature_catalog\\example_usage.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\spark\\pyspark_feature_catalog\\example_usage.ipynb)\n", + "\n", "\n", "\n", "Define multiple features with clear lineage using Hamilton.\n", @@ -61,6 +62,7 @@ ], "source": [ "import findspark\n", + "\n", "findspark.init()\n", "\n", "import importlib\n", @@ -70,8 +72,7 @@ "\n", "import pyspark.sql as ps\n", "\n", - "from hamilton import base\n", - "from hamilton import driver\n", + "from hamilton import base, driver\n", "\n", "logging.basicConfig(stream=sys.stdout)\n", "\n", @@ -93,13 +94,14 @@ "outputs": [], "source": [ "class MyCustomBuilder(base.ResultMixin):\n", - " @staticmethod\n", - " def build_result(**outputs: typing.Dict[str, typing.Any]) -> ps.DataFrame:\n", + " @staticmethod\n", + " def build_result(**outputs: typing.Dict[str, typing.Any]) -> ps.DataFrame:\n", " # TODO: add error handling when incompatible outputs are created\n", - " level_info = outputs['level_info']\n", - " zone_counts = outputs['zone_counts']\n", + " level_info = outputs[\"level_info\"]\n", + " zone_counts = outputs[\"zone_counts\"]\n", " return zone_counts.join(level_info, on=aggregation_level)\n", "\n", + "\n", "adapter = base.SimplePythonGraphAdapter(MyCustomBuilder)" ] }, diff --git a/examples/spark/scraping_and_chunking b/examples/spark/scraping_and_chunking index 4bf9913cf..acf91713f 120000 --- a/examples/spark/scraping_and_chunking +++ b/examples/spark/scraping_and_chunking @@ -1 +1 @@ -/Users/stefankrawczyk/dagworks/hamilton/examples/LLM_Workflows/scraping_and_chunking/spark \ No newline at end of file +/Users/stefankrawczyk/dagworks/hamilton/examples/LLM_Workflows/scraping_and_chunking/spark diff --git a/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb b/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb index 5e5e1ee67..c205e4044 100644 --- a/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb +++ b/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example notebook - pandas [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb)\n" + "# Example notebook - pandas [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__pandas.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\spark\\world_of_warcraft\\world_of_warcraft__pandas.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\spark\\world_of_warcraft\\world_of_warcraft__pandas.ipynb)\n" ] }, { @@ -38,14 +38,13 @@ ], "source": [ "# Cell 1 - import the things you need\n", + "import importlib\n", "import logging\n", "import sys\n", - "import importlib\n", "\n", - "import numpy as np\n", "import pandas as pd\n", "\n", - "from hamilton import ad_hoc_utils, driver\n", + "from hamilton import driver\n", "\n", "logging.basicConfig(stream=sys.stdout)" ] @@ -71,7 +70,7 @@ "# import the jupyter extension\n", "%load_ext autoreload\n", "# set it to only reload the modules imported\n", - "%autoreload 1\n" + "%autoreload 1" ] }, { @@ -315,7 +314,10 @@ "\n", "# let's create the dataframe!\n", "# if you only did `pip install sf-hamilton` earlier:\n", - "df = dr.execute(output_columns, inputs={**input_df.to_dict(orient='series'), \"aggregation_level\": aggregation_level})\n", + "df = dr.execute(\n", + " output_columns,\n", + " inputs={**input_df.to_dict(orient=\"series\"), \"aggregation_level\": aggregation_level},\n", + ")\n", "# else if you did `pip install \"sf-hamilton[visualization]\"` earlier:\n", "# dr.visualize_execution(output_columns, './my-dag.dot', {})\n", "print(df)" diff --git a/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb b/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb index 5ef0821cc..f52745629 100644 --- a/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb +++ b/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb)\n", + "# Example notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v1.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\spark\\world_of_warcraft\\world_of_warcraft__spark_v1.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\spark\\world_of_warcraft\\world_of_warcraft__spark_v1.ipynb)\n", + "\n", "\n", "\n", "This notebooks shows how you can create multiple features that build/depend upon each other, using spark." @@ -41,12 +42,9 @@ ], "source": [ "# Cell 1 - import the things you need\n", + "import importlib\n", "import logging\n", "import sys\n", - "import importlib\n", - "\n", - "import pyspark.sql as ps\n", - "import pandas as pd\n", "\n", "from hamilton import driver\n", "\n", @@ -74,7 +72,7 @@ "# import the jupyter extension\n", "%load_ext autoreload\n", "# set it to only reload the modules imported\n", - "%autoreload 1\n" + "%autoreload 1" ] }, { @@ -186,7 +184,7 @@ "zone_features = importlib.import_module(\"zone_features__spark_v1\")\n", "\n", "dr = driver.Driver({}, zone_features) # can pass in multiple modules\n", - "dr.display_all_functions(None)\n" + "dr.display_all_functions(None)" ] }, { @@ -392,7 +390,7 @@ "\n", "result = dr.execute([\"zone_likelihoods\"], inputs={\"aggregation_level\": aggregation_level})\n", "\n", - "result.iloc[0,0].toPandas()\n" + "result.iloc[0, 0].toPandas()" ] }, { diff --git a/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb b/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb index abc426320..97ca9ba9c 100644 --- a/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb +++ b/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb @@ -14,7 +14,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Example notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb)\n", + "# Example notebook [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/spark/world_of_warcraft/world_of_warcraft__spark_v2.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\spark\\world_of_warcraft\\world_of_warcraft__spark_v2.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\spark\\world_of_warcraft\\world_of_warcraft__spark_v2.ipynb)\n", + "\n", "\n", "\n", "This notebooks shows how you can create multiple features that build/depend upon each other, using spark." @@ -41,12 +42,9 @@ ], "source": [ "# Cell 1 - import the things you need\n", + "import importlib\n", "import logging\n", "import sys\n", - "import importlib\n", - "\n", - "import pyspark.sql as ps\n", - "import pandas as pd\n", "\n", "from hamilton import driver\n", "\n", @@ -74,7 +72,7 @@ "# import the jupyter extension\n", "%load_ext autoreload\n", "# set it to only reload the modules imported\n", - "%autoreload 1\n" + "%autoreload 1" ] }, { @@ -252,7 +250,7 @@ "zone_features = importlib.import_module(\"zone_features__spark_v2\")\n", "\n", "dr = driver.Driver({}, zone_features) # can pass in multiple modules\n", - "dr.display_all_functions(None)\n" + "dr.display_all_functions(None)" ] }, { @@ -457,7 +455,7 @@ "\n", "result = dr.execute([\"zone_likelihoods\"], inputs={\"aggregation_level\": aggregation_level})\n", "\n", - "result.iloc[0,0].toPandas()\n" + "result.iloc[0, 0].toPandas()" ] }, { diff --git a/examples/vaex/notebook.ipynb b/examples/vaex/notebook.ipynb index a375d2b65..d4b65c259 100644 --- a/examples/vaex/notebook.ipynb +++ b/examples/vaex/notebook.ipynb @@ -14,7 +14,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Vaex + Hamilton integration [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/vaex/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/vaex/notebook.ipynb)\n" + "# Vaex + Hamilton integration [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/vaex/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/vaex/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\vaex\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\vaex\\notebook.ipynb)\n" ] }, { @@ -35,7 +35,7 @@ ], "source": [ "from hamilton import base, driver\n", - "from hamilton.plugins import vaex_extensions, h_vaex" + "from hamilton.plugins import h_vaex" ] }, { diff --git a/examples/validation/static_validator/notebook.ipynb b/examples/validation/static_validator/notebook.ipynb index 9749db75f..6e992591d 100644 --- a/examples/validation/static_validator/notebook.ipynb +++ b/examples/validation/static_validator/notebook.ipynb @@ -16,7 +16,7 @@ "id": "9c98f181", "metadata": {}, "source": [ - "# Static validator dev [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/validation/static_validator/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/validation/static_validator/notebook.ipynb)\n" + "# Static validator dev [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples/validation/static_validator/notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples/validation/static_validator/notebook.ipynb) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/dagworks-inc/hamilton/blob/main/examples\\validation\\static_validator\\notebook.ipynb) [![GitHub badge](https://img.shields.io/badge/github-view_source-2b3137?logo=github)](https://github.com/apache/hamilton/blob/main/examples\\validation\\static_validator\\notebook.ipynb)\n" ] }, { @@ -167,15 +167,16 @@ "source": [ "# Validator\n", "from typing import Optional\n", - "from hamilton.lifecycle import api\n", + "\n", "from hamilton.graph_types import HamiltonNode\n", + "from hamilton.lifecycle import api\n", "\n", "\n", "class MyTagValidator(api.StaticValidator):\n", " \"\"\"Static validator that is run right after the graph is built.\"\"\"\n", "\n", " def run_to_validate_node(\n", - " self, *, node: HamiltonNode, **future_kwargs\n", + " self, *, node: HamiltonNode, **future_kwargs\n", " ) -> tuple[bool, Optional[str]]:\n", " if node.tags.get(\"node_type\", \"\") == \"output\":\n", " table_name = node.tags.get(\"table_name\")\n", @@ -184,6 +185,7 @@ " return False, error_msg\n", " return True, None\n", "\n", + "\n", "tag_validator = MyTagValidator()" ] }, @@ -212,12 +214,7 @@ "source": [ "from hamilton import driver\n", "\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(good_module)\n", - " .with_adapters(tag_validator)\n", - " .build()\n", - ")\n", + "dr = driver.Builder().with_modules(good_module).with_adapters(tag_validator).build()\n", "dr.execute([good_module.foo])" ] }, @@ -261,12 +258,7 @@ ], "source": [ "# this should error\n", - "dr = (\n", - " driver.Builder()\n", - " .with_modules(bad_module)\n", - " .with_adapters(tag_validator)\n", - " .build()\n", - ")" + "dr = driver.Builder().with_modules(bad_module).with_adapters(tag_validator).build()" ] }, { diff --git a/hamilton/cli/__main__.py b/hamilton/cli/__main__.py index 41c411c6e..1b58f6da0 100644 --- a/hamilton/cli/__main__.py +++ b/hamilton/cli/__main__.py @@ -81,7 +81,7 @@ class CliState: typer.Option( "--context", "-ctx", - help="Path to Driver context file [.json, .py]", + help="Path to Driver context file [.json, .py, .toml]", exists=True, dir_okay=False, readable=True, diff --git a/hamilton/cli/logic.py b/hamilton/cli/logic.py index e5f0f563b..5ea8c72db 100644 --- a/hamilton/cli/logic.py +++ b/hamilton/cli/logic.py @@ -269,7 +269,6 @@ def visualize_diff( # TODO refactor ContextLoader to a class -# TODO support loading from pyproject.toml def load_context(file_path: Path) -> dict: if not file_path.exists(): raise FileNotFoundError(f"`{file_path}` doesn't exist.") @@ -279,6 +278,8 @@ def load_context(file_path: Path) -> dict: context = _read_json_context(file_path) elif extension == ".py": context = _read_py_context(file_path) + elif extension in [".toml", ".tml"]: + context = _read_toml_context(file_path) else: raise ValueError(f"Received extension `{extension}` is unsupported.") @@ -337,3 +338,43 @@ def _read_py_context(file_path: Path) -> dict: context[k] = getattr(module, k, None) return context + + +def _read_toml_context(file_path: Path) -> dict: + """Read context from a TOML file. For pyproject.toml, looks for Hamilton configuration in [tool.hamilton] section.""" + try: + import tomli # Using tomli for compatibility with older Python versions + except ImportError: + # Provide a helpful error message if tomli is not available + raise ImportError( + "tomli is required to read TOML files. " + "Install it with `pip install tomli` or `pip install sf-hamilton[cli]` which includes TOML support." + ) from None + + with open(file_path, "rb") as f: + data = tomli.load(f) + + # First check if there's a [tool.hamilton] section in pyproject.toml + # This is where Hamilton-specific configuration would typically go + hamilton_config = data.get("tool", {}).get("hamilton", {}) + + # If we find Hamilton-specific config, use it as the context + if hamilton_config: + context = { + CONFIG_HEADER: hamilton_config.get("config", {}), + FINAL_VARS_HEADER: hamilton_config.get("final_vars", []), + INPUTS_HEADER: hamilton_config.get("inputs", {}), + OVERRIDES_HEADER: hamilton_config.get("overrides", {}), + } + else: + # Otherwise, check for top-level Hamilton context headers + context = {} + for k in [ + CONFIG_HEADER, + FINAL_VARS_HEADER, + INPUTS_HEADER, + OVERRIDES_HEADER, + ]: + context[k] = data.get(k, None) + + return context diff --git a/hamilton/data_quality/default_validators.py b/hamilton/data_quality/default_validators.py index f91f7ad17..c48376c49 100644 --- a/hamilton/data_quality/default_validators.py +++ b/hamilton/data_quality/default_validators.py @@ -224,7 +224,7 @@ def validate(self, data: pd.Series) -> base.ValidationResult: message=f"Out of {total_length} items in the series, {total_na} of them are Nan, " f"representing: {MaxFractionNansValidatorPandasSeries._to_percent(fraction_na)}. " f"Max allowable Nans is: {MaxFractionNansValidatorPandasSeries._to_percent(self.max_fraction_nans)}," - f' so this {"passes" if passes else "does not pass"}.', + f" so this {'passes' if passes else 'does not pass'}.", diagnostics={ "total_nan": total_na, "total_length": total_length, diff --git a/hamilton/function_modifiers/base.py b/hamilton/function_modifiers/base.py index a4081883c..0b538fc39 100644 --- a/hamilton/function_modifiers/base.py +++ b/hamilton/function_modifiers/base.py @@ -789,7 +789,7 @@ def _add_original_function_to_nodes(fn: Callable, nodes: List[node.Node]) -> Lis def _resolve_nodes_error(fn: Callable) -> str: - return f"Exception occurred while compiling function: {fn.__name__} " f"to nodes" + return f"Exception occurred while compiling function: {fn.__name__} to nodes" def resolve_nodes(fn: Callable, config: Dict[str, Any]) -> Collection[node.Node]: diff --git a/hamilton/function_modifiers/expanders.py b/hamilton/function_modifiers/expanders.py index 9e16a9451..acc4c5a6d 100644 --- a/hamilton/function_modifiers/expanders.py +++ b/hamilton/function_modifiers/expanders.py @@ -832,7 +832,7 @@ def _validate_extract_fields(fields: dict): if errors: raise base.InvalidDecoratorException( - f"Error, found these {errors}. " f"Please pass in a dict of string to types. " + f"Error, found these {errors}. Please pass in a dict of string to types. " ) diff --git a/hamilton/function_modifiers/macros.py b/hamilton/function_modifiers/macros.py index 507e561f4..f331e05dc 100644 --- a/hamilton/function_modifiers/macros.py +++ b/hamilton/function_modifiers/macros.py @@ -68,8 +68,7 @@ def ensure_function_empty(fn: Callable): _empty_function_with_docstring.__code__.co_code, }: raise base.InvalidDecoratorException( - f"Function: {fn.__name__} is not empty. Must have only one line that " - 'consists of "pass"' + f'Function: {fn.__name__} is not empty. Must have only one line that consists of "pass"' ) diff --git a/hamilton/graph.py b/hamilton/graph.py index f8cc32e4c..0b8fd4fdc 100644 --- a/hamilton/graph.py +++ b/hamilton/graph.py @@ -311,7 +311,7 @@ def _get_input_label(input_nodes: FrozenSet[node.Node]) -> str: name = dep.name type_string = get_type_as_string(dep.type) if get_type_as_string(dep.type) else "" rows.append(f"{name}{type_string}") - return f"<{''.join(rows)}
>" + return f'<{"".join(rows)}
>' def _get_node_type(n: node.Node) -> str: """Get the node type of a DAG node. diff --git a/hamilton/lifecycle/base.py b/hamilton/lifecycle/base.py index 19daa794a..1c881220f 100644 --- a/hamilton/lifecycle/base.py +++ b/hamilton/lifecycle/base.py @@ -175,8 +175,7 @@ def validate_validator_fn(fn: Callable): """ if inspect.iscoroutinefunction(fn): raise InvalidLifecycleAdapter( - f"Lifecycle validators must (so far) be synchronous, " - f"but {fn} is an async function. " + f"Lifecycle validators must (so far) be synchronous, but {fn} is an async function. " ) validate_lifecycle_adapter_function(fn, returns_value=True) diff --git a/hamilton/plugins/h_spark.py b/hamilton/plugins/h_spark.py index 3d0f7e33a..701fff3bc 100644 --- a/hamilton/plugins/h_spark.py +++ b/hamilton/plugins/h_spark.py @@ -642,7 +642,7 @@ def derive_dataframe_parameter( if requested_parameter is not None: if requested_parameter not in dataframe_parameters: raise ValueError( - f"Requested parameter {requested_parameter} not found in " f"{location_name}" + f"Requested parameter {requested_parameter} not found in {location_name}" ) return requested_parameter if len(dataframe_parameters) == 0: diff --git a/hamilton/telemetry.py b/hamilton/telemetry.py index f0822f84e..686dedb35 100644 --- a/hamilton/telemetry.py +++ b/hamilton/telemetry.py @@ -112,7 +112,7 @@ def _check_config_and_environ_for_telemetry_flag( telemetry_enabled = config_obj.getboolean("DEFAULT", "telemetry_enabled") except ValueError as e: logger.debug( - "Unable to parse value for `telemetry_enabled` from config. " f"Encountered {e}" + f"Unable to parse value for `telemetry_enabled` from config. Encountered {e}" ) if os.environ.get("HAMILTON_TELEMETRY_ENABLED") is not None: env_value = os.environ.get("HAMILTON_TELEMETRY_ENABLED") diff --git a/plugin_tests/h_dask/resources b/plugin_tests/h_dask/resources index 1e58ceb6d..50d707597 120000 --- a/plugin_tests/h_dask/resources +++ b/plugin_tests/h_dask/resources @@ -1 +1 @@ -../../tests/resources \ No newline at end of file +../../tests/resources diff --git a/plugin_tests/h_narwhals/resources b/plugin_tests/h_narwhals/resources index 1e58ceb6d..50d707597 120000 --- a/plugin_tests/h_narwhals/resources +++ b/plugin_tests/h_narwhals/resources @@ -1 +1 @@ -../../tests/resources \ No newline at end of file +../../tests/resources diff --git a/plugin_tests/h_ray/resources b/plugin_tests/h_ray/resources index 1e58ceb6d..50d707597 120000 --- a/plugin_tests/h_ray/resources +++ b/plugin_tests/h_ray/resources @@ -1 +1 @@ -../../tests/resources \ No newline at end of file +../../tests/resources diff --git a/plugin_tests/h_spark/resources b/plugin_tests/h_spark/resources index 1e58ceb6d..50d707597 120000 --- a/plugin_tests/h_spark/resources +++ b/plugin_tests/h_spark/resources @@ -1 +1 @@ -../../tests/resources \ No newline at end of file +../../tests/resources diff --git a/pyproject.toml b/pyproject.toml index 548fd2434..ba7e26243 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -53,7 +53,10 @@ dependencies = [ ] [project.optional-dependencies] -cli = ["typer"] +cli = [ + "typer", + "tomli", +] dask = ["dask[complete]"] # commonly you'll want everything. dask-array = ["dask[array]"] dask-dataframe = ["dask[dataframe]"] diff --git a/tests/cli/resources/test_context.toml b/tests/cli/resources/test_context.toml new file mode 100644 index 000000000..4997b2cca --- /dev/null +++ b/tests/cli/resources/test_context.toml @@ -0,0 +1,7 @@ +# Test TOML file for Hamilton CLI context loading + +# Define Hamilton headers as top-level values +HAMILTON_CONFIG = {test_param = "test_value"} +HAMILTON_FINAL_VARS = ["final_var1", "final_var2"] +HAMILTON_INPUTS = {input_value = 42} +HAMILTON_OVERRIDES = {override_value = "override"} diff --git a/tests/cli/resources/test_tool_hamilton.toml b/tests/cli/resources/test_tool_hamilton.toml new file mode 100644 index 000000000..1902542b5 --- /dev/null +++ b/tests/cli/resources/test_tool_hamilton.toml @@ -0,0 +1,8 @@ +# Test TOML file for Hamilton CLI context loading using [tool.hamilton] section + +[tool.hamilton] +# Hamilton-specific configuration +config = {test_param = "test_value"} +final_vars = ["final_var1", "final_var2"] +inputs = {input_value = 42, string_input = "test_string"} +overrides = {override_value = "override"} diff --git a/tests/cli/test_logic.py b/tests/cli/test_logic.py index 38bfc99a1..29d262178 100644 --- a/tests/cli/test_logic.py +++ b/tests/cli/test_logic.py @@ -93,3 +93,42 @@ def test_diff_node_versions(): assert diff["reference_only"] == ["orders_per_customer"] assert diff["current_only"] == ["orders_per_distributor"] assert diff["edit"] == ["average_order_by_customer", "customer_summary_table"] + + +def test_load_context_from_toml(monkeypatch): + """Test loading context from a TOML file with top-level Hamilton headers.""" + monkeypatch.setenv("HAMILTON_CONFIG", "HAMILTON_CONFIG") + monkeypatch.setenv("HAMILTON_FINAL_VARS", "HAMILTON_FINAL_VARS") + monkeypatch.setenv("HAMILTON_INPUTS", "HAMILTON_INPUTS") + monkeypatch.setenv("HAMILTON_OVERRIDES", "HAMILTON_OVERRIDES") + + toml_path = Path(__file__).parent / "resources" / "test_context.toml" + + # Load context from TOML file + context = logic.load_context(toml_path) + + # Check that the expected values are loaded + assert context["HAMILTON_CONFIG"] == {"test_param": "test_value"} + assert context["HAMILTON_INPUTS"] == {"input_value": 42} + assert context["HAMILTON_OVERRIDES"] == {"override_value": "override"} + # The TOML file has an array of final variables + assert context["HAMILTON_FINAL_VARS"] == ["final_var1", "final_var2"] + + +def test_load_context_from_toml_tool_hamilton(monkeypatch): + """Test loading context from a TOML file with [tool.hamilton] section.""" + monkeypatch.setenv("HAMILTON_CONFIG", "HAMILTON_CONFIG") + monkeypatch.setenv("HAMILTON_FINAL_VARS", "HAMILTON_FINAL_VARS") + monkeypatch.setenv("HAMILTON_INPUTS", "HAMILTON_INPUTS") + monkeypatch.setenv("HAMILTON_OVERRIDES", "HAMILTON_OVERRIDES") + + toml_path = Path(__file__).parent / "resources" / "test_tool_hamilton.toml" + + # Load context from TOML file with tool.hamilton section + context = logic.load_context(toml_path) + + # Check that the expected values from [tool.hamilton] section are loaded + assert context["HAMILTON_CONFIG"] == {"test_param": "test_value"} + assert context["HAMILTON_INPUTS"] == {"input_value": 42, "string_input": "test_string"} + assert context["HAMILTON_OVERRIDES"] == {"override_value": "override"} + assert context["HAMILTON_FINAL_VARS"] == ["final_var1", "final_var2"] diff --git a/tests/function_modifiers/test_metadata.py b/tests/function_modifiers/test_metadata.py index d27e96e2d..3b6cb4189 100644 --- a/tests/function_modifiers/test_metadata.py +++ b/tests/function_modifiers/test_metadata.py @@ -41,7 +41,7 @@ def dummy_tagged_function() -> int: "foo@", # Invalid identifier "foo bar", # No spaces "foo.bar+baz", # Invalid key, not a valid identifier - "" "...", # Empty not allowed # Empty elements not allowed + "...", # Empty not allowed # Empty elements not allowed ], ) def test_tags_invalid_key(key): diff --git a/tests/test_base.py b/tests/test_base.py index 663892d4c..1e8dd4e43 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -345,7 +345,7 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected ({"Index:::int64": ["a"]}, {}, {}), marks=pytest.mark.skipif( PD_VERSION < version.parse("2.0.0"), - reason="Pandas 2.0 changed default indices but we still " "support pandas <2.0", + reason="Pandas 2.0 changed default indices but we still support pandas <2.0", ), ), pytest.param( @@ -353,7 +353,7 @@ def test_PandasDataFrameResult_build_dataframe_with_dataframes(outputs, expected ({"Int64Index:::int64": ["a"]}, {}, {}), marks=pytest.mark.skipif( PD_VERSION >= version.parse("2.0.0"), - reason="Pandas 2.0 changed default indices but we still " "support pandas <2.0", + reason="Pandas 2.0 changed default indices but we still support pandas <2.0", ), ), ], diff --git a/tests/test_type_utils.py b/tests/test_type_utils.py index a49e47e1a..edaf608bb 100644 --- a/tests/test_type_utils.py +++ b/tests/test_type_utils.py @@ -312,7 +312,7 @@ def test_check_input_type_match(node_type, input_value): # included if the @pytest.mark.skipif( sys.version_info < (3, 9, 0), - reason="Type hinting generics in standard collections " "is only supported in 3.9+", + reason="Type hinting generics in standard collections is only supported in 3.9+", ) def test_check_input_types_subscripted_generics_dict_str_Any(): """Tests check_input_type of SimplePythonDataFrameGraphAdapter""" @@ -324,7 +324,7 @@ def test_check_input_types_subscripted_generics_dict_str_Any(): # included if the @pytest.mark.skipif( sys.version_info < (3, 9, 0), - reason="Type hinting generics in standard collections " "is only supported in 3.9+", + reason="Type hinting generics in standard collections is only supported in 3.9+", ) def test_check_input_types_subscripted_generics_list_Any(): """Tests check_input_type of SimplePythonDataFrameGraphAdapter""" diff --git a/ui/backend/hamilton_ui b/ui/backend/hamilton_ui index 13cd1fa7f..254defddb 120000 --- a/ui/backend/hamilton_ui +++ b/ui/backend/hamilton_ui @@ -1 +1 @@ -server \ No newline at end of file +server diff --git a/ui/backend/server/trackingserver_base/auth/sync.py b/ui/backend/server/trackingserver_base/auth/sync.py index 85bc52305..bc808d9ad 100644 --- a/ui/backend/server/trackingserver_base/auth/sync.py +++ b/ui/backend/server/trackingserver_base/auth/sync.py @@ -95,7 +95,7 @@ async def ensure_user_exists(auth_provider_user_id: str, user_email: str) -> Use user_model = await User.objects.aget(auth_provider_user_id=auth_provider_user_id) except User.DoesNotExist: logger.warning( - f"Creating new user {user_email} with auth provider ID " f"id {auth_provider_user_id}" + f"Creating new user {user_email} with auth provider ID id {auth_provider_user_id}" ) user_model = User( auth_provider_user_id=auth_provider_user_id, diff --git a/ui/sdk/tests/tracking/test_runs.py b/ui/sdk/tests/tracking/test_runs.py index 7feee0b44..b6c2769c6 100644 --- a/ui/sdk/tests/tracking/test_runs.py +++ b/ui/sdk/tests/tracking/test_runs.py @@ -319,8 +319,8 @@ def create_node(name: str, type_: type) -> node.Node: create_node("test", np.ndarray), "unsupported", { - "action": "reach out to the DAGWorks team to add " "support for this type.", - "unsupported_type": " with " "dimensions (2, 2, 1)", + "action": "reach out to the DAGWorks team to add support for this type.", + "unsupported_type": " with dimensions (2, 2, 1)", }, ), (