@@ -520,7 +520,7 @@ def write_repro(
520520 code_str = f"{ code_str } \n { main_code .format (graph_name = self .graph_name )} \n { comment_str } "
521521
522522 if file_name is None :
523- file_name = f"{ self .graph_name } .py"
523+ file_name = f"{ self .graph_name } _ { compile_fn . name } _repro .py"
524524 with open (folder / file_name , "w" ) as f :
525525 print (code_str , file = f )
526526 format_python_file (folder / file_name )
@@ -633,7 +633,7 @@ def write_benchmark(
633633
634634 code_str = f"{ code_str } \n { main_code .format (graph_name = self .graph_name )} \n { comment_str } "
635635 if file_name is None :
636- file_name = f"{ self .graph_name } .py"
636+ file_name = f"{ self .graph_name } _ { compile_fn . name } _ { time_fn . name } _benchmark .py"
637637 with open (folder / file_name , "w" ) as f :
638638 print (code_str , file = f )
639639 format_python_file (folder / file_name )
@@ -924,7 +924,7 @@ def write_nvfuser_benchmark(self, folder, time_fn: TimerInterface, file_name=Non
924924{ comment_str }
925925"""
926926 if file_name is None :
927- file_name = f"{ self .name } _benchmark_nvfuser .py"
927+ file_name = f"{ self .name } _benchmark_nvfuser_ { time_fn . name } .py"
928928 with open (folder / file_name , "w" ) as f :
929929 print (code_str , file = f )
930930 format_python_file (folder / file_name )
@@ -983,7 +983,7 @@ def write_inductor_benchmark(self, folder: PathLike, time_fn: TimerInterface, fi
983983print(measurement)
984984"""
985985 if file_name is None :
986- file_name = f"{ self .name } _benchmark_inductor .py"
986+ file_name = f"{ self .name } _benchmark_inductor_ { time_fn . name } .py"
987987 with open (folder / file_name , "w" ) as f :
988988 f .write (code_str )
989989 format_python_file (folder / file_name )
@@ -1428,22 +1428,39 @@ def save_thunderfx_repros(
14281428 Saves reproduction scripts for ThunderFX subgraphs.
14291429
14301430 This function:
1431- 1. Creates a folder structure to organize the repros
1432- .
1433- └── graph0
1434- ├── fusion_reports
1435- │ ├── graph0_thunder_0_nvFusion0_forward_repro_nvfuser.py
1436- │ ├── graph0_thunder_0_nvFusion1_forward_repro_nvfuser.py
1437- │ ├── graph0_thunder_0_nvFusion2_backward_repro_nvfuser.py
1438- ├── graph0_thunder_0_bwd_trace.py
1439- ├── graph0_thunder_0_fwd_trace.py
1440- └── graph0_thunder_0.py
1431+ 1. Creates a folder structure to organize the repro or benchmark scripts:
1432+
1433+ If use_benchmark is True:
1434+ graph0/
1435+ ├── fusion_reports/
1436+ │ ├── graph0_thunder_0_nvFusion0_forward_benchmark_inductor_KernelTime.py
1437+ │ ├── graph0_thunder_0_nvFusion0_forward_benchmark_inductor_WallTimeWithMemoryUsage.py
1438+ │ ├── graph0_thunder_0_nvFusion0_forward_benchmark_nvfuser_KernelTime.py
1439+ │ └── graph0_thunder_0_nvFusion0_forward_benchmark_nvfuser_WallTimeWithMemoryUsage.py
1440+ ├── graph0_repro_torchcompile.py
1441+ ├── graph0_thunder_0_bwd_trace.py
1442+ ├── graph0_thunder_0_fwd_trace.py
1443+ ├── graph0_thunder_0_inductor_KernelTime_benchmark.py
1444+ ├── graph0_thunder_0_inductor_WallTimeWithMemoryUsage_benchmark.py
1445+ ├── graph0_thunder_0_thunder_KernelTime_benchmark.py
1446+ └── graph0_thunder_0_thunder_WallTimeWithMemoryUsage_benchmark.py
1447+
1448+ If use_benchmark is False:
1449+ graph0/
1450+ ├── fusion_reports/
1451+ │ ├── graph0_thunder_0_nvFusion0_forward_repro_inductor.py
1452+ │ └── graph0_thunder_0_nvFusion0_forward_repro_nvfuser.py
1453+ ├── graph0_repro_torchcompile.py
1454+ ├── graph0_thunder_0_fwd_trace.py
1455+ ├── graph0_thunder_0_bwd_trace.py
1456+ ├── graph0_thunder_0_inductor_repro.py
1457+ └── graph0_thunder_0_thunder_repro.py
14411458
14421459 2. For each Thunder FX graph and its subgraphs:
1443- - Checks runnability if requested
1444- - Saves benchmark or repro scripts
1445- - Saves trace information if requested
1446- - Saves nvFusion repros if requested
1460+ - Checks runnability if requested
1461+ - Saves benchmark or repro scripts
1462+ - Saves trace information if requested
1463+ - Saves nvFusion repros if requested
14471464
14481465 Args:
14491466 fn: The callable to analyze
@@ -1452,7 +1469,7 @@ def save_thunderfx_repros(
14521469 check_runnability: If True, checks if graphs can run with Thunder
14531470 save_fusion: If True, saves nvFusion repros
14541471 save_trace: If True, saves trace information
1455- stream: Stream to write output log informationto
1472+ stream: Stream to write output log information to
14561473 force_overwrite: If True, overwrites existing folder at folder_path
14571474 **compile_kwargs: Keyword arguments for Thunder and torch.compile
14581475
@@ -1472,6 +1489,7 @@ def inner_fn(*args, **kwargs):
14721489 for thunder_fxgraph_report in thunder_fxgraph_reports :
14731490 graph_folder = folder_path / thunder_fxgraph_report .graph_name
14741491 graph_folder .mkdir (exist_ok = True , parents = True )
1492+ thunder_fxgraph_report .write_inductor_repro (graph_folder )
14751493 for split_report in thunder_fxgraph_report .subgraph_reports :
14761494 if check_runnability or save_trace or save_fusion :
14771495 try :
@@ -1484,22 +1502,38 @@ def inner_fn(*args, **kwargs):
14841502 continue
14851503 else :
14861504 stream .write (f"Successfully ran the { split_report .graph_name } using Thunder\n " )
1505+
1506+ from torch ._inductor .compile_fx import graph_returns_tuple
1507+
1508+ # torch._inductor.compile requires the output to be tuple, if not, the symbolic trace is necessary
1509+ skip_symbolic_trace = graph_returns_tuple (split_report .graph )
1510+ torchinductor = TorchInductorSpecification (skip_symbolic_trace = skip_symbolic_trace )
14871511 if use_benchmark :
1488- split_report .write_benchmark (graph_folder , thunderjit , WallTime )
1512+ split_report .write_benchmark (graph_folder , thunderjit , WallTimeWithMemoryUsage )
1513+ split_report .write_benchmark (graph_folder , thunderjit , KernelTime )
1514+
1515+ split_report .write_benchmark (graph_folder , torchinductor , WallTimeWithMemoryUsage )
1516+ split_report .write_benchmark (graph_folder , torchinductor , KernelTime )
14891517 else :
14901518 split_report .write_repro (graph_folder , thunderjit )
1519+ split_report .write_repro (graph_folder , torchinductor )
14911520 if save_trace :
14921521 with open (graph_folder / f"{ split_report .graph_name } _fwd_trace.py" , "w" ) as f :
14931522 f .write (str (split_report .fwd_trc ))
1494- with open (graph_folder / f"{ split_report .graph_name } _bwd_trace.py" , "w" ) as f :
1495- f .write (str (split_report .bwd_trc ))
1523+ if split_report .bwd_trc is not None :
1524+ with open (graph_folder / f"{ split_report .graph_name } _bwd_trace.py" , "w" ) as f :
1525+ f .write (str (split_report .bwd_trc ))
14961526 if save_fusion :
14971527 fusion_folder = graph_folder / "fusion_reports"
14981528 fusion_folder .mkdir (exist_ok = True , parents = True )
14991529 for fusion_report in split_report .fusion_reports :
15001530 if use_benchmark :
1501- fusion_report .write_nvfuser_benchmark (fusion_folder , WallTime )
1531+ fusion_report .write_nvfuser_benchmark (fusion_folder , WallTimeWithMemoryUsage )
1532+ fusion_report .write_inductor_benchmark (fusion_folder , WallTimeWithMemoryUsage )
1533+ fusion_report .write_nvfuser_benchmark (fusion_folder , KernelTime )
1534+ fusion_report .write_inductor_benchmark (fusion_folder , KernelTime )
15021535 else :
15031536 fusion_report .write_nvfuser_repro (fusion_folder )
1537+ fusion_report .write_inductor_repro (fusion_folder )
15041538
15051539 return inner_fn
0 commit comments