Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/benchmarking_of_stop_detection_algorithms.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
"city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
"outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
"filepath_root = data_dir / \"gc_data_long\"\n",
"tc = {\n",
" \"user_id\": \"gc_identifier\",\n",
" \"timestamp\": \"unix_ts\",\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/benchmarking_of_stop_detection_algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
outer_box = box(*city.total_bounds).buffer(15, join_style='mitre')

filepath_root = 'gc_data_long/'
filepath_root = data_dir / "gc_data_long"
tc = {
"user_id": "gc_identifier",
"timestamp": "unix_ts",
Expand Down
2 changes: 1 addition & 1 deletion examples/dbstop_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
"outer_box = box(*city.total_bounds)\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
"filepath_root = data_dir / \"gc_data_long\"\n",
"tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"# Density based stop detection (Temporal DBSCAN)\n",
Expand Down
192 changes: 13 additions & 179 deletions examples/generate_synthetic_trajectories.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,243 +2,77 @@
"cells": [
{
"cell_type": "markdown",
"id": "10e2517a",
"metadata": {},
"source": [
"# Synthetic Trajectory Generation with Nomad\n",
"\n",
"This notebook demonstrates how to generate realistic synthetic human mobility trajectories."
]
"source": "# Synthetic Trajectory Generation with Nomad\n\nThis notebook demonstrates how to generate realistic synthetic human mobility trajectories."
},
{
"cell_type": "code",
"execution_count": null,
"id": "58d68e64",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import time\n",
"from pathlib import Path\n",
"from joblib import Parallel, delayed\n",
"\n",
"from nomad.city_gen import City\n",
"from nomad.traj_gen import Agent, Population\n",
"from nomad.stop_detection.viz import plot_pings, plot_time_barcode"
]
"source": "import pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport time\nfrom pathlib import Path\nfrom joblib import Parallel, delayed\n\nimport nomad.data as data_folder\nfrom nomad.city_gen import City\nfrom nomad.traj_gen import Agent, Population\nfrom nomad.stop_detection.viz import plot_pings, plot_time_barcode"
},
{
"cell_type": "code",
"execution_count": null,
"id": "9860e901",
"metadata": {},
"outputs": [],
"source": [
"city = City.from_geopackage('garden-city.gpkg')\n",
"city._build_hub_network(hub_size=16)\n",
"city.compute_gravity(exponent=2.0)\n",
"city.compute_shortest_paths(callable_only=True)\n",
"\n",
"print(f\"City: {city.name}\")\n",
"print(f\"Dimensions: {city.dimensions}\")\n",
"print(f\"Buildings: {len(city.buildings_gdf)}\")"
]
"source": "data_dir = Path(data_folder.__file__).parent\ncity = City.from_geopackage(data_dir / \"garden-city.gpkg\")\ncity._build_hub_network(hub_size=16)\ncity.compute_gravity(exponent=2.0)\ncity.compute_shortest_paths(callable_only=True)\n\nprint(f\"City: {city.name}\")\nprint(f\"Dimensions: {city.dimensions}\")\nprint(f\"Buildings: {len(city.buildings_gdf)}\")"
},
{
"cell_type": "markdown",
"id": "9ff8ddec",
"metadata": {},
"source": [
"## Part 1: Effect of Sampling Parameters on Sparsity\n",
"\n",
"Generate 3 agents with 2-day trajectories, varying beta_duration and beta_start \n",
"to show their effect on sparsity (q = observed points / ground truth points)."
]
"source": "## Part 1: Effect of Sampling Parameters on Sparsity\n\nGenerate 3 agents with 2-day trajectories, varying beta_duration and beta_start \nto show their effect on sparsity (q = observed points / ground truth points)."
},
{
"cell_type": "code",
"execution_count": null,
"id": "b6336cc1",
"metadata": {},
"outputs": [],
"source": [
"population = Population(city)\n",
"population.generate_agents(N=3, seed=42, name_count=2)\n",
"\n",
"# Vary beta_duration and beta_start to target different sparsity levels\n",
"sampling_params = [\n",
" {'beta_ping': 5, 'beta_start': 100, 'beta_durations': 60}, \n",
" {'beta_ping': 5, 'beta_start': 250, 'beta_durations': 150}, \n",
" {'beta_ping': 5, 'beta_start': 400, 'beta_durations': 240} \n",
"]\n",
"\n",
"# Generate 2-day trajectories for quick visualization\n",
"for i, (agent_id, agent) in enumerate(population.roster.items()):\n",
" agent.generate_trajectory(\n",
" datetime=pd.Timestamp(\"2024-01-01T07:00-04:00\"),\n",
" end_time=pd.Timestamp(\"2024-01-03T07:00-04:00\"),\n",
" seed=i\n",
" )\n",
"\n",
" agent.sample_trajectory(\n",
" **sampling_params[i],\n",
" replace_sparse_traj=True,\n",
" seed=i\n",
" )\n",
" \n",
" q = len(agent.sparse_traj) / len(agent.trajectory)\n",
" print(f\"Agent {i}: q={q:.3f}, beta_start={sampling_params[i]['beta_start']}, \"\n",
" f\"beta_dur={sampling_params[i]['beta_durations']}\")"
]
"source": "population = Population(city)\npopulation.generate_agents(N=3, seed=42, name_count=2)\n\n# Vary beta_duration and beta_start to target different sparsity levels\nsampling_params = [\n {'beta_ping': 5, 'beta_start': 100, 'beta_durations': 60}, \n {'beta_ping': 5, 'beta_start': 250, 'beta_durations': 150}, \n {'beta_ping': 5, 'beta_start': 400, 'beta_durations': 240} \n]\n\n# Generate 2-day trajectories for quick visualization\nfor i, (agent_id, agent) in enumerate(population.roster.items()):\n agent.generate_trajectory(\n datetime=pd.Timestamp(\"2024-01-01T07:00-04:00\"),\n end_time=pd.Timestamp(\"2024-01-03T07:00-04:00\"),\n seed=i\n )\n\n agent.sample_trajectory(\n **sampling_params[i],\n replace_sparse_traj=True,\n seed=i\n )\n \n q = len(agent.sparse_traj) / len(agent.trajectory)\n print(f\"Agent {i}: q={q:.3f}, beta_start={sampling_params[i]['beta_start']}, \"\n f\"beta_dur={sampling_params[i]['beta_durations']}\")"
},
{
"cell_type": "code",
"execution_count": null,
"id": "0e4b20fa",
"metadata": {
"lines_to_next_cell": 1
},
"metadata": {},
"outputs": [],
"source": [
"fig, axes = plt.subplots(2, 3, figsize=(15, 10), \n",
" gridspec_kw={'height_ratios': [10, 1]})\n",
"\n",
"for i, (agent_id, agent) in enumerate(population.roster.items()):\n",
" ax_map = axes[0, i]\n",
" ax_barcode = axes[1, i]\n",
" \n",
" city.plot_city(ax=ax_map, doors=False, address=False)\n",
" \n",
" traj = agent.sparse_traj\n",
" plot_pings(traj, ax=ax_map, s=15, point_color='red', \n",
" x='x', y='y', timestamp='timestamp')\n",
" \n",
" plot_time_barcode(traj['timestamp'], ax=ax_barcode, set_xlim=True)\n",
" \n",
" q = len(traj) / len(agent.trajectory)\n",
" ax_map.set_title(f\"Agent {i}: {len(traj)} obs (q={q:.2f})\\n\"\n",
" f\"beta_start={sampling_params[i]['beta_start']}, \"\n",
" f\"beta_dur={sampling_params[i]['beta_durations']}\")\n",
" ax_map.set_axis_off()\n",
"\n",
"plt.tight_layout()\n",
"plt.savefig('data/trajectories_visualization.png', dpi=150, bbox_inches='tight')\n",
"plt.show()"
]
"source": "fig, axes = plt.subplots(2, 3, figsize=(15, 10), \n gridspec_kw={'height_ratios': [10, 1]})\n\nfor i, (agent_id, agent) in enumerate(population.roster.items()):\n ax_map = axes[0, i]\n ax_barcode = axes[1, i]\n \n city.plot_city(ax=ax_map, doors=False, address=False)\n \n traj = agent.sparse_traj\n plot_pings(traj, ax=ax_map, s=15, point_color='red', \n x='x', y='y', timestamp='timestamp')\n \n plot_time_barcode(traj['timestamp'], ax=ax_barcode, set_xlim=True)\n \n q = len(traj) / len(agent.trajectory)\n ax_map.set_title(f\"Agent {i}: {len(traj)} obs (q={q:.2f})\\n\"\n f\"beta_start={sampling_params[i]['beta_start']}, \"\n f\"beta_dur={sampling_params[i]['beta_durations']}\")\n ax_map.set_axis_off()\n\nplt.tight_layout()\nplt.savefig('data/trajectories_visualization.png', dpi=150, bbox_inches='tight')\nplt.show()"
},
{
"cell_type": "markdown",
"id": "dc7b266a",
"metadata": {},
"source": [
"## Part 2: Parallel Generation at Scale\n",
"\n",
"Generate trajectories for 15 users using parallelization."
]
"source": "## Part 2: Parallel Generation at Scale\n\nGenerate trajectories for 15 users using parallelization."
},
{
"cell_type": "code",
"execution_count": null,
"id": "5238c745",
"metadata": {
"lines_to_next_cell": 1
},
"metadata": {},
"outputs": [],
"source": [
"def generate_agent_trajectory(args):\n",
" \"\"\"Worker function for parallel generation.\"\"\"\n",
" identifier, home, work, seed = args\n",
" city = City.from_geopackage('garden-city.gpkg')\n",
" city._build_hub_network(hub_size=16)\n",
" city.compute_gravity(exponent=2.0)\n",
" city.compute_shortest_paths(callable_only=True)\n",
" agent = Agent(identifier=identifier, city=city, home=home, workplace=work)\n",
" \n",
" agent.generate_trajectory(\n",
" datetime=pd.Timestamp(\"2024-01-01T07:00-04:00\"),\n",
" end_time=pd.Timestamp(\"2024-01-08T07:00-04:00\"),\n",
" seed=seed\n",
" )\n",
" agent.sample_trajectory(\n",
" beta_ping=5,\n",
" replace_sparse_traj=True,\n",
" seed=seed\n",
" )\n",
" sparse_df = agent.sparse_traj.copy()\n",
" sparse_df['user_id'] = identifier\n",
" sparse_df['home'] = home\n",
" sparse_df['workplace'] = work\n",
" return sparse_df"
]
"source": "def generate_agent_trajectory(args):\n \"\"\"Worker function for parallel generation.\"\"\"\n identifier, home, work, seed = args\n data_dir = Path(data_folder.__file__).parent\n city = City.from_geopackage(data_dir / \"garden-city.gpkg\")\n city._build_hub_network(hub_size=16)\n city.compute_gravity(exponent=2.0)\n city.compute_shortest_paths(callable_only=True)\n agent = Agent(identifier=identifier, city=city, home=home, workplace=work)\n \n agent.generate_trajectory(\n datetime=pd.Timestamp(\"2024-01-01T07:00-04:00\"),\n end_time=pd.Timestamp(\"2024-01-08T07:00-04:00\"),\n seed=seed\n )\n agent.sample_trajectory(\n beta_ping=5,\n replace_sparse_traj=True,\n seed=seed\n )\n sparse_df = agent.sparse_traj.copy()\n sparse_df['user_id'] = identifier\n sparse_df['home'] = home\n sparse_df['workplace'] = work\n return sparse_df"
},
{
"cell_type": "code",
"execution_count": null,
"id": "aa1b6112",
"metadata": {},
"outputs": [],
"source": [
"n_agents = 15\n",
"rng = np.random.default_rng(100)\n",
"homes = city.buildings_gdf[city.buildings_gdf['building_type'] == 'home']['id'].to_numpy()\n",
"workplaces = city.buildings_gdf[city.buildings_gdf['building_type'] == 'workplace']['id'].to_numpy()\n",
"\n",
"agent_params = [\n",
" (f'agent_{i:04d}',\n",
" rng.choice(homes),\n",
" rng.choice(workplaces),\n",
" i)\n",
" for i in range(n_agents)\n",
"]"
]
"source": "n_agents = 15\nrng = np.random.default_rng(100)\nhomes = city.buildings_gdf[city.buildings_gdf['building_type'] == 'home']['id'].to_numpy()\nworkplaces = city.buildings_gdf[city.buildings_gdf['building_type'] == 'workplace']['id'].to_numpy()\n\nagent_params = [\n (f'agent_{i:04d}',\n rng.choice(homes),\n rng.choice(workplaces),\n i)\n for i in range(n_agents)\n]"
},
{
"cell_type": "code",
"execution_count": null,
"id": "a0364bc9",
"metadata": {},
"outputs": [],
"source": [
"print(f\"Generating {n_agents} agents in parallel...\")\n",
"start_time = time.time()\n",
"\n",
"results = Parallel(n_jobs=-1, verbose=10)(\n",
" delayed(generate_agent_trajectory)(params) for params in agent_params\n",
")\n",
"\n",
"generation_time = time.time() - start_time\n",
"print(f\"Generated {n_agents} agents in {generation_time:.2f}s ({generation_time/n_agents:.2f}s per agent)\")"
]
"source": "print(f\"Generating {n_agents} agents in parallel...\")\nstart_time = time.time()\n\nresults = Parallel(n_jobs=-1, verbose=10)(\n delayed(generate_agent_trajectory)(params) for params in agent_params\n)\n\ngeneration_time = time.time() - start_time\nprint(f\"Generated {n_agents} agents in {generation_time:.2f}s ({generation_time/n_agents:.2f}s per agent)\")"
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8a48448",
"metadata": {},
"outputs": [],
"source": [
"parallel_population = Population(city)\n",
"for df, params in zip(results, agent_params):\n",
" identifier, home, work, seed = params\n",
" agent = Agent(identifier=identifier, city=city, home=home, workplace=work, seed=seed)\n",
" agent.sparse_traj = df.drop(columns=['home', 'workplace'])\n",
" parallel_population.add_agent(agent, verbose=False)\n",
"\n",
"output_path = 'data/trajectories_15_users'\n",
"parallel_population.save_pop(\n",
" sparse_path=str(output_path),\n",
" fmt='parquet'\n",
")\n",
"print(f\"Saved sparse trajectories to {output_path}\")"
]
"source": "parallel_population = Population(city)\nfor df, params in zip(results, agent_params):\n identifier, home, work, seed = params\n agent = Agent(identifier=identifier, city=city, home=home, workplace=work, seed=seed)\n agent.sparse_traj = df.drop(columns=['home', 'workplace'])\n parallel_population.add_agent(agent, verbose=False)\n\nparallel_population.reproject_to_mercator(sparse_traj=True)\n\noutput_path = 'data/trajectories_15_users'\nparallel_population.save_pop(\n sparse_path=str(output_path),\n fmt='parquet'\n)\nprint(f\"Saved sparse trajectories to {output_path}\")"
}
],
"metadata": {
"jupytext": {
"formats": "ipynb,py:percent"
},
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
Expand Down
9 changes: 7 additions & 2 deletions examples/generate_synthetic_trajectories.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,14 @@
from pathlib import Path
from joblib import Parallel, delayed

import nomad.data as data_folder
from nomad.city_gen import City
from nomad.traj_gen import Agent, Population
from nomad.stop_detection.viz import plot_pings, plot_time_barcode

# %%
city = City.from_geopackage('garden-city.gpkg')
data_dir = Path(data_folder.__file__).parent
city = City.from_geopackage(data_dir / "garden-city.gpkg")
city._build_hub_network(hub_size=16)
city.compute_gravity(exponent=2.0)
city.compute_shortest_paths(callable_only=True)
Expand Down Expand Up @@ -110,7 +112,8 @@
def generate_agent_trajectory(args):
"""Worker function for parallel generation."""
identifier, home, work, seed = args
city = City.from_geopackage('garden-city.gpkg')
data_dir = Path(data_folder.__file__).parent
city = City.from_geopackage(data_dir / "garden-city.gpkg")
city._build_hub_network(hub_size=16)
city.compute_gravity(exponent=2.0)
city.compute_shortest_paths(callable_only=True)
Expand Down Expand Up @@ -165,6 +168,8 @@ def generate_agent_trajectory(args):
agent.sparse_traj = df.drop(columns=['home', 'workplace'])
parallel_population.add_agent(agent, verbose=False)

parallel_population.reproject_to_mercator(sparse_traj=True)

output_path = 'data/trajectories_15_users'
parallel_population.save_pop(
sparse_path=str(output_path),
Expand Down
2 changes: 1 addition & 1 deletion examples/grid_based_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
"city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
"outer_box = box(*city.total_bounds)\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
"filepath_root = data_dir / \"gc_data_long\"\n",
"tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/grid_based_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
outer_box = box(*city.total_bounds)

filepath_root = 'gc_data_long/'
filepath_root = data_dir / "gc_data_long"
tc = {"user_id": "gc_identifier", "x": "dev_x", "y": "dev_y", "timestamp": "unix_ts"}

users = ['admiring_brattain']
Expand Down
2 changes: 1 addition & 1 deletion examples/hdbscan_demo.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
"city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')\n",
"outer_box = box(*city.total_bounds)\n",
"\n",
"filepath_root = 'gc_data_long/'\n",
"filepath_root = data_dir / \"gc_data_long\"\n",
"tc = {\"user_id\": \"gc_identifier\", \"x\": \"dev_x\", \"y\": \"dev_y\", \"timestamp\": \"unix_ts\"}\n",
"\n",
"users = ['admiring_brattain']\n",
Expand Down
2 changes: 1 addition & 1 deletion examples/hdbscan_demo.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
city = gpd.read_parquet(data_dir / 'garden-city-buildings-mercator.parquet')
outer_box = box(*city.total_bounds)

filepath_root = 'gc_data_long/'
filepath_root = data_dir / "gc_data_long"
tc = {"user_id": "gc_identifier", "x": "dev_x", "y": "dev_y", "timestamp": "unix_ts"}

users = ['admiring_brattain']
Expand Down
Binary file removed examples/lachesis_colored_pings.png
Binary file not shown.
Loading