From 0781559030d3226221e264eb969a523a1736c8d1 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 3 Mar 2026 16:09:37 +0000
Subject: [PATCH 01/42] Update automation.sh

Added compute_metrics.py to the pipeline and removed distribution.py (Bitcoin Tor nodes are now redistributed directly in parse.py)
---
 bitcoin/automation.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bitcoin/automation.sh b/bitcoin/automation.sh
index d010661..995e2f2 100644
--- a/bitcoin/automation.sh
+++ b/bitcoin/automation.sh
@@ -10,12 +10,12 @@ python3 cleanup_dead_nodes.py
 python3 collect_geodata.py
 #python3 collect_osdata.py # not in use
 python3 parse.py
-python3 distribution.py
 python3 plot.py
+python3 compute_metrics.py
 
 # The following 2 lines create a folder and move all png and csv files to it
 mkdir output/"$(date +%Y-%m-%d)"
-mv -t output/"$(date +%Y-%m-%d)" output/*.png output/*.csv
+mv -t output/"$(date +%Y-%m-%d)" output/{clients,countries,protocols,organizations,ip_type}_*.csv output/response_length.json 2>/dev/null || true
 
 sleep 7d # will repeat the whole process every X days
 

From 900c1c3108b8577cf1552335e4d23e58fed40cb1 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 3 Mar 2026 16:17:31 +0000
Subject: [PATCH 02/42] Implement Tor node redistribution in parse.py instead
 of distribution.py

---
 bitcoin/parse.py | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/bitcoin/parse.py b/bitcoin/parse.py
index c008f90..7fa39c2 100644
--- a/bitcoin/parse.py
+++ b/bitcoin/parse.py
@@ -327,6 +327,46 @@ def version(reachable_nodes, mode):
             versions_df.to_csv(f'./output/{name.lower()}_{ledger}.csv', index_label = name)
 
 
+def redistribute_tor_nodes(name, ledger, df, mode):
+    """
+    Redistributes Tor node count proportionally across non-Tor rows.
+    :param name: lowercase version of the mode ('countries' or 'organizations') used in file naming.
+    :param ledger: the ledger name.
+    :param df: the dataframe in which the Tor nodes must be reditributed.
+    :param mode: the mode name (e.g., 'Countries', 'Organizations').
+    """
+    date = datetime.today().strftime('%Y-%m-%d')
+    tor_row = df[df[mode] == 'Tor']
+    if tor_row.empty:
+        logging.info(f"No Tor nodes found in {ledger}.")
+        return
+
+    number_of_tor_nodes = tor_row[date].values[0]  # extract the number of Tor nodes for the given date
+    number_of_total_nodes_without_tor = df[df[f'{mode}'] != 'Tor'][f'{date}'].sum()  # sum of node counts excluding the Tor row
+    number_of_total_nodes = number_of_total_nodes_without_tor + number_of_tor_nodes
+    df['Distribution'] = df.apply(lambda row: round((row[f'{date}'] / number_of_total_nodes_without_tor) * number_of_tor_nodes) if row[f'{mode}'] != 'Tor' else 0, axis=1)  # create a new column 'Distribution' that distributes the Tor nodes proportionally to non-Tor rows
+    df[date] = df[date] + df['Distribution']
+    df_without_tor = df[df[f'{mode}'] != 'Tor']  # filter out the Tor row
+    df_without_tor[[mode, date]].to_csv(f'./output/{name}_{ledger}_without_tor.csv', index=False)  # save the updated DataFrame to a new CSV
+
+
+def without_tor():
+    """
+    Loads a CSV file and calls the redistribute_tor_nodes function.
+    """
+    ledger = 'bitcoin'
+    modes = ['Countries', 'Organizations']
+    for mode in modes:
+        logging.info(f'parse.py: Removing Tor from {ledger} {mode}')
+        name = mode.lower()
+        filename = pathlib.Path(f'./output/{name}_{ledger}.csv')
+        if not filename.is_file():
+            logging.warning(f"File not found: {filename}")
+            return None
+        df = pd.read_csv(filename)
+        redistribute_tor_nodes(name, ledger, df, mode)
+
+
 def cluster_organizations(ledger):
     """
     Clusters organizations in CSV files.
@@ -389,6 +429,8 @@ def main():
             geography(reachable_nodes, ledger, mode)
         if 'Organizations' in MODES:
             cluster_organizations(ledger)
+        if 'bitcoin' in LEDGERS:
+            without_tor()
 
 if __name__ == '__main__':
     main()

From 2ce349c74ae9316a68f3e8684f7e35dea4435763 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 3 Mar 2026 16:18:04 +0000
Subject: [PATCH 03/42] Delete bitcoin/distribution.py

Functions now implemented in parse.py
---
 bitcoin/distribution.py | 54 -----------------------------------------
 1 file changed, 54 deletions(-)
 delete mode 100644 bitcoin/distribution.py

diff --git a/bitcoin/distribution.py b/bitcoin/distribution.py
deleted file mode 100644
index 0504a24..0000000
--- a/bitcoin/distribution.py
+++ /dev/null
@@ -1,54 +0,0 @@
-# This script is only used for the Bitcoin ledger. If the Bitcoin ledger is not selected in the config.yaml file, this script does nothing.
-
-import network_decentralization.helper as hlp
-import logging
-import pandas as pd
-from pathlib import Path
-
-def redistribute_tor_nodes(name, ledger, df, mode):
-    """
-    Redistributes Tor node count proportionally across non-Tor rows.
-    :param name: lowercase version of the mode ('countries' or 'organizations') used in file naming.
-    :param ledger: the ledger name.
-    :param df: the dataframe in which the Tor nodes must be reditributed.
-    :param mode: Countries or Organizations.
-    """
-    tor_row = df[df[mode] == 'Tor']
-    if tor_row.empty:
-        logging.info(f"No Tor nodes found in {ledger}.")
-        return
-
-    number_of_tor_nodes = tor_row[date].values[0] # extract the number of Tor nodes for the given date
-    number_of_total_nodes_without_tor = df[df[f'{mode}'] != 'Tor'][f'{date}'].sum() # sum of node counts excluding the Tor row
-    number_of_total_nodes = number_of_total_nodes_without_tor + number_of_tor_nodes
-    df['Distribution'] = df.apply(lambda row: round((row[f'{date}'] / number_of_total_nodes_without_tor) * number_of_tor_nodes) if row[f'{mode}'] != 'Tor' else 0, axis=1) # create a new column 'Distribution' that distributes the Tor nodes proportionally to non-Tor rows
-    df[date] = df[date] + df['Distribution']
-    df_without_tor = df[df[f'{mode}'] != 'Tor'] # filter out the Tor row
-    df_without_tor[[mode, date]].to_csv(f'./output/{name}_{ledger}_without_tor.csv', index=False) # save the updated DataFrame to a new CSV
-
-
-def without_tor():
-    """
-    Loads a CSV file and calls the redistribute_tor_nodes function.
-    """
-    ledger = 'bitcoin'
-    for mode in MODES:
-        logging.info(f'distribution.py: Removing Tor from {ledger} {mode}')
-        name = mode.lower()
-        filename = Path(f'./output/{name}_{ledger}.csv')
-        if not filename.is_file():
-            logging.warning(f"File not found: {filename}")
-            return None
-        df = pd.read_csv(filename)
-        redistribute_tor_nodes(name, ledger, df, mode)
-
-LEDGERS = hlp.get_ledgers()   
-MODES = hlp.get_mode()
-date = hlp.get_date()
-
-def main():
-    if 'bitcoin' in LEDGERS:
-        without_tor()
-
-if __name__ == '__main__':
-    main()

From d981f6635bd0f7bf627e1da672e99b5a9badffe8 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 3 Mar 2026 16:19:46 +0000
Subject: [PATCH 04/42] Add compute_metrics.py

---
 bitcoin/compute_metrics.py | 183 +++++++++++++++++++++++++++++++++++++
 1 file changed, 183 insertions(+)
 create mode 100644 bitcoin/compute_metrics.py

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
new file mode 100644
index 0000000..06ef2aa
--- /dev/null
+++ b/bitcoin/compute_metrics.py
@@ -0,0 +1,183 @@
+#!/usr/bin/env python3
+"""
+Script to compute network decentralization metrics from CSV files in the output directory.
+Processes both organization and country CSV files and outputs metrics in CSV format.
+"""
+
+import csv
+import pathlib
+import sys
+from network_decentralization.metrics.herfindahl_hirschman_index import compute_hhi
+from network_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient
+from network_decentralization.metrics.entropy import compute_entropy
+from network_decentralization.metrics.concentration_ratio import compute_concentration_ratio
+
+
+def read_csv_data(csv_path):
+    """
+    Read CSV file and extract date and distribution values.
+    CSV format: Header row is "EntityType,YYYY-MM-DD" followed by data rows "entity_name,count"
+    
+    :param csv_path: Path to the CSV file
+    :return: Tuple of (date, sorted_distribution_list)
+    """
+    with open(csv_path, 'r', encoding='utf-8') as f:
+        reader = csv.reader(f)
+        
+        # Read header
+        header = next(reader)
+        date = header[1]  # Extract date from header
+        
+        # Read data rows and extract counts
+        distribution = []
+        for row in reader:
+            if len(row) >= 2:
+                try:
+                    count = int(row[1])
+                    distribution.append(count)
+                except (ValueError, IndexError):
+                    continue
+        
+        # Sort in descending order for metric calculations
+        distribution.sort(reverse=True)
+        
+        return date, distribution
+
+
+def get_ledger_name(csv_path):
+    """
+    Extract ledger name from CSV filename.
+    Expected format: organizations_<ledger>.csv or countries_<ledger>.csv
+    
+    :param csv_path: Path to the CSV file
+    :return: Ledger name (e.g., 'bitcoin', 'bitcoin_cash')
+    """
+    filename = csv_path.stem  # Get filename without extension
+    parts = filename.split('_')
+    # Remove 'organizations' or 'countries' prefix
+    return '_'.join(parts[1:])
+
+
+def compute_metrics(distribution):
+    """
+    Compute all metrics for a given distribution.
+    
+    :param distribution: Sorted list of entity counts (descending order)
+    :return: Dictionary with all computed metrics
+    """
+    total = sum(distribution)
+    
+    if total == 0:
+        return {
+            'hhi': None,
+            'nakamoto': None,
+            'entropy': None,
+            'max_power_ratio': None
+        }
+    
+    metrics = {
+        'hhi': compute_hhi(distribution),
+        'nakamoto': compute_nakamoto_coefficient(distribution),
+        'entropy': compute_entropy(distribution, alpha=1),  # Shannon entropy
+        'max_power_ratio': max(distribution) / total if distribution else 0
+    }
+    
+    return metrics
+
+
+def process_csv_files(output_dir, file_pattern, is_country=False):
+    """
+    Process all CSV files matching a pattern and output metrics.
+    Appends results to existing files or creates new ones.
+    For bitcoin, uses the _without_tor versions if they exist.
+    
+    :param output_dir: Path to the output directory
+    :param file_pattern: Glob pattern for CSV files (e.g., 'organizations_*.csv')
+    :param is_country: Boolean to indicate if processing country files
+    """
+    csv_files = sorted(output_dir.glob(file_pattern))
+    
+    for csv_path in csv_files:
+        # Skip _without_tor files in the glob - we'll handle them explicitly for bitcoin
+        if '_without_tor' in csv_path.name:
+            continue
+            
+        try:
+            ledger = get_ledger_name(csv_path)
+            
+            # For bitcoin, check if _without_tor version exists and use that instead
+            if ledger == 'bitcoin':
+                file_type = 'countries' if is_country else 'organizations'
+                without_tor_path = output_dir / f"{file_type}_bitcoin_without_tor.csv"
+                if without_tor_path.exists():
+                    csv_path = without_tor_path
+            
+            date, distribution = read_csv_data(csv_path)
+            metrics = compute_metrics(distribution)
+            
+            # Determine output filename
+            if is_country:
+                output_filename = f"output_countries_{ledger}.csv"
+                output_path = output_dir / output_filename
+                file_exists = output_path.exists()
+                
+                # Write header and data (append if exists)
+                with open(output_path, 'a', newline='', encoding='utf-8') as f:
+                    writer = csv.writer(f)
+                    if not file_exists:
+                        writer.writerow(['ledger', 'date', 'clustering', 'entropy', 'hhi', 'nakamoto_coefficient', 'max_power_ratio'])
+                    writer.writerow([
+                        ledger,
+                        date,
+                        'False',
+                        f"{metrics['entropy']:.15g}",
+                        f"{metrics['hhi']:.16g}",
+                        metrics['nakamoto'],
+                        f"{metrics['max_power_ratio']:.16g}"
+                    ])
+                print(f"Appended to: {output_filename}", file=sys.stderr)
+            else:
+                output_filename = f"output_organizations_{ledger}.csv"
+                output_path = output_dir / output_filename
+                file_exists = output_path.exists()
+                
+                # Write header and data (append if exists)
+                with open(output_path, 'a', newline='', encoding='utf-8') as f:
+                    writer = csv.writer(f)
+                    if not file_exists:
+                        writer.writerow(['ledger', 'date', 'clustering', 'hhi', 'nakamoto_coefficient', 'max_power_ratio'])
+                    writer.writerow([
+                        ledger,
+                        date,
+                        'False',
+                        f"{metrics['hhi']:.16g}",
+                        metrics['nakamoto'],
+                        f"{metrics['max_power_ratio']:.16g}"
+                    ])
+                print(f"Appended to: {output_filename}", file=sys.stderr)
+            
+        except Exception as e:
+            print(f"Error processing {csv_path.name}: {e}", file=sys.stderr)
+            continue
+
+
+def main():
+    """
+    Main entry point for the script.
+    Processes organization and country CSV files from the output directory.
+    """
+    output_dir = pathlib.Path(__file__).parent / 'output'
+    
+    if not output_dir.exists():
+        print(f"Error: Output directory not found at {output_dir}", file=sys.stderr)
+        sys.exit(1)
+    
+    # Process organization files
+    process_csv_files(output_dir, 'organizations_*.csv', is_country=False)
+    
+    # Process country files
+    process_csv_files(output_dir, 'countries_*.csv', is_country=True)
+
+
+if __name__ == '__main__':
+    main()

From 046ac9b91e500851cf599ec8e5752b2efa4c8200 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 3 Mar 2026 16:21:23 +0000
Subject: [PATCH 05/42] Create concentration_ratio.py

---
 .../metrics/concentration_ratio.py                       | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 bitcoin/network_decentralization/metrics/concentration_ratio.py

diff --git a/bitcoin/network_decentralization/metrics/concentration_ratio.py b/bitcoin/network_decentralization/metrics/concentration_ratio.py
new file mode 100644
index 0000000..8893cb2
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/concentration_ratio.py
@@ -0,0 +1,9 @@
+def compute_concentration_ratio(block_distribution, topn):
+    """
+    Calculates the n-concentration ratio of a distribution of balances
+    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    :param topn: the number of top block producers to consider
+    :returns: float that represents the ratio of blocks produced by the top n block producers (0 if there weren't any)
+    """
+    total_blocks = sum(block_distribution)
+    return sum(block_distribution[:topn]) / total_blocks if total_blocks else 0

From 33ab5ec8f911e3415fa069711d44aaf9fca4feec Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 3 Mar 2026 16:21:50 +0000
Subject: [PATCH 06/42] Upload metrics files

---
 .../metrics/entropy.py                        | 48 +++++++++++++++++++
 .../metrics/herfindahl_hirschman_index.py     | 20 ++++++++
 .../metrics/nakamoto_coefficient.py           | 10 ++++
 .../metrics/tau_index.py                      | 18 +++++++
 .../metrics/total_entities.py                 |  7 +++
 5 files changed, 103 insertions(+)
 create mode 100644 bitcoin/network_decentralization/metrics/entropy.py
 create mode 100644 bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py
 create mode 100644 bitcoin/network_decentralization/metrics/nakamoto_coefficient.py
 create mode 100644 bitcoin/network_decentralization/metrics/tau_index.py
 create mode 100644 bitcoin/network_decentralization/metrics/total_entities.py

diff --git a/bitcoin/network_decentralization/metrics/entropy.py b/bitcoin/network_decentralization/metrics/entropy.py
new file mode 100644
index 0000000..56e1b84
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/entropy.py
@@ -0,0 +1,48 @@
+from math import log
+from network_decentralization.metrics.total_entities import compute_total_entities
+
+
+def compute_entropy(block_distribution, alpha):
+    """
+    Calculates the entropy of a distribution of blocks to entities
+    Pi is the relative frequency of each entity.
+    Renyi entropy: 1/(1-alpha) * log2 (sum (Pi**alpha))
+    Shannon entropy (alpha=1): −sum P(Si) log2 (Pi)
+    Min entropy (alpha=-1): -log max Pi
+    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    :param alpha: the entropy parameter (depending on its value the corresponding entropy measure is used)
+    :returns: a float that represents the entropy of the data or None if the data is empty
+    """
+    all_blocks = sum(block_distribution)
+    if all_blocks == 0:
+        return None
+    if alpha == 1:
+        entropy = 0
+        for value in block_distribution:
+            rel_freq = value / all_blocks
+            if rel_freq > 0:
+                entropy -= rel_freq * log(rel_freq, 2)
+    else:
+        if alpha == -1:
+            entropy = - log(max(block_distribution)/all_blocks, 2)
+        else:
+            sum_freqs = 0
+            for entry in block_distribution:
+                sum_freqs += pow(entry/all_blocks, alpha)
+            entropy = log(sum_freqs, 2) / (1 - alpha)
+
+    return entropy
+
+
+def compute_max_entropy(num_entities, alpha):
+    return compute_entropy([1 for i in range(num_entities)], alpha)
+
+
+def compute_entropy_percentage(block_distribution, alpha):
+    if sum(block_distribution) == 0:
+        return None
+    try:
+        total_entities = compute_total_entities(block_distribution)
+        return compute_entropy(block_distribution, alpha) / compute_max_entropy(total_entities, alpha)
+    except ZeroDivisionError:
+        return 0
diff --git a/bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py b/bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py
new file mode 100644
index 0000000..de22b87
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py
@@ -0,0 +1,20 @@
+def compute_hhi(block_distribution):
+    """
+    Calculates the Herfindahl-Hirschman index of a distribution of blocks to entities
+    From investopedia: The HHI is calculated by squaring the market share of each firm competing in a market and then
+    summing the resulting numbers. It can range from close to 0 to 10,000, with lower values indicating a less
+    concentrated market. The U.S. Department of Justice considers a market with an HHI of less than 1,500 to be a
+    competitive marketplace, an HHI of 1,500 to 2,500 to be a moderately concentrated marketplace,
+    and an HHI of 2,500 or greater to be a highly concentrated marketplace.
+    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    :return: float between 0 and 10,000 that represents the HHI of the given distribution or None if the data is empty
+    """
+    total_blocks = sum(block_distribution)
+    if total_blocks == 0:
+        return None
+
+    hhi = 0
+    for num_blocks in block_distribution:
+        hhi += pow(100 * num_blocks / total_blocks, 2)
+
+    return hhi
diff --git a/bitcoin/network_decentralization/metrics/nakamoto_coefficient.py b/bitcoin/network_decentralization/metrics/nakamoto_coefficient.py
new file mode 100644
index 0000000..6f38992
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/nakamoto_coefficient.py
@@ -0,0 +1,10 @@
+from network_decentralization.metrics.tau_index import compute_tau_index
+
+
+def compute_nakamoto_coefficient(block_distribution):
+    """
+    Calculates the Nakamoto coefficient of a distribution of blocks to entities
+    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    :returns: int that represents the Nakamoto coefficient of the given distribution, or None if the data is empty
+    """
+    return compute_tau_index(block_distribution, 0.5)
diff --git a/bitcoin/network_decentralization/metrics/tau_index.py b/bitcoin/network_decentralization/metrics/tau_index.py
new file mode 100644
index 0000000..a87ae23
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/tau_index.py
@@ -0,0 +1,18 @@
+def compute_tau_index(block_distribution, threshold):
+    """
+    Calculates the tau-decentralization index of a distribution of blocks
+    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    :param threshold: float, the parameter of the tau-decentralization index, i.e. the threshold for the power
+    ratio that is captured by the index (e.g. 0.66 for 66%)
+    :returns: int that corresponds to the tau index of the given distribution, or None if there were no blocks
+    """
+    total_blocks = sum(block_distribution)
+    if total_blocks == 0:
+        return None
+    tau_index, power_ratio_covered = 0, 0
+    for block_amount in block_distribution:
+        if power_ratio_covered >= threshold:
+            break
+        tau_index += 1
+        power_ratio_covered += block_amount / total_blocks
+    return tau_index
diff --git a/bitcoin/network_decentralization/metrics/total_entities.py b/bitcoin/network_decentralization/metrics/total_entities.py
new file mode 100644
index 0000000..3ebaabe
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/total_entities.py
@@ -0,0 +1,7 @@
+def compute_total_entities(block_distribution):
+    """
+    Computes the number of entities that have produced blocks in the given timeframe.
+    :param block_distribution: list of integers, each being the blocks that an entity has produced
+    :returns: an integer that represents the number of entities that have produced blocks
+    """
+    return len([v for v in block_distribution if v > 0])

From c780365cf2485e2881924bc0ea861345f2c86b04 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 3 Mar 2026 16:36:13 +0000
Subject: [PATCH 07/42] Update README.md

---
 bitcoin/README.md | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/bitcoin/README.md b/bitcoin/README.md
index 683d8ad..9ebe4a9 100644
--- a/bitcoin/README.md
+++ b/bitcoin/README.md
@@ -13,7 +13,8 @@ This component of the project analyses the decentralisation of Bitcoin, Bitcoin
 1. **Network Crawling:** `crawl.py` tries to discover all reachable nodes participating in the network. Based on the [Bitnodes](https://github.com/ayeowch/bitnodes.git) project.
 2. **Data Collection:** Scripts collect data about nodes like IP addresses and client versions.
 3. **Data Parsing:** `parse.py` formats raw logs into structured files.
-4. **Visualisation:** `plot.py` generates several graphs.
+4. **Metrics Computation:** `compute_metrics.py` calculates decentralisation metrics.
+5. **Visualisation:** `plot.py` generates several graphs.
 
 ---
 
@@ -27,12 +28,12 @@ This component of the project analyses the decentralisation of Bitcoin, Bitcoin
 - **`parse.py`**  
   Processes raw data (e.g., logs from crawling) into structured formats (JSON, CSV) for easier analysis and plotting.
 
+- **`compute_metrics.py`**  
+  Computes network decentralisation metrics (HHI, Nakamoto coefficient, entropy, max power ratio) from CSV files.
+
 - **`analyze.py`** (Not in use)  
   Analyses datasets to extract decentralisation metrics.
 
-- **`distribution.py`**  
-  Distributes Tor nodes among others proportionally, by country or organisation.
-
 - **`plot.py`**  
   Generates data visualisations.
 
@@ -92,6 +93,7 @@ The scripts generate:
 - Parsed node datasets (CSV, JSON)
 - Geolocation-enriched data
 - Plots and charts in PNG
+- Computed metrics in `output_organizations_*.csv` and `output_countries_*.csv` files
 
 ---
 
@@ -106,9 +108,9 @@ bitcoin/
 ├── collect_geodata.py
 ├── collect_osdata.py
 ├── crawl.py
-├── distribution.py
 ├── parse.py
 ├── plot.py
+├── compute_metrics.py
 │
 ├── config.yaml
 ├── requirements.txt

From 4820164f949d1661a0656eb5a779a7a7fe673edf Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Wed, 4 Mar 2026 16:40:48 +0000
Subject: [PATCH 08/42] Reorder metrics computation and visualisation sections
 in workflow

---
 bitcoin/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bitcoin/README.md b/bitcoin/README.md
index 9ebe4a9..f34e25f 100644
--- a/bitcoin/README.md
+++ b/bitcoin/README.md
@@ -13,8 +13,8 @@ This component of the project analyses the decentralisation of Bitcoin, Bitcoin
 1. **Network Crawling:** `crawl.py` tries to discover all reachable nodes participating in the network. Based on the [Bitnodes](https://github.com/ayeowch/bitnodes.git) project.
 2. **Data Collection:** Scripts collect data about nodes like IP addresses and client versions.
 3. **Data Parsing:** `parse.py` formats raw logs into structured files.
-4. **Metrics Computation:** `compute_metrics.py` calculates decentralisation metrics.
-5. **Visualisation:** `plot.py` generates several graphs.
+4. **Visualisation:** `plot.py` generates several graphs.
+5. **Metrics Computation:** `compute_metrics.py` calculates decentralisation metrics.
 
 ---
 

From c508167a7b1e4ea3a713d3be5c60fc6a19b6e776 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Wed, 4 Mar 2026 17:13:25 +0000
Subject: [PATCH 09/42] Delete unused script

---
 bitcoin/analyze.py | 60 ----------------------------------------------
 1 file changed, 60 deletions(-)
 delete mode 100644 bitcoin/analyze.py

diff --git a/bitcoin/analyze.py b/bitcoin/analyze.py
deleted file mode 100644
index c30c268..0000000
--- a/bitcoin/analyze.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import csv
-import network_decentralization.helper as hlp
-import networkx as nx
-import logging
-
-logging.basicConfig(format='[%(asctime)s] %(message)s', datefmt='%Y/%m/%d %I:%M:%S %p', level=logging.INFO)
-
-
-LEDGERS = ['bitcoin', 'bitcoin_cash', 'dogecoin', 'litecoin', 'zcash']
-LEDGERS = ['bitcoin_cash', 'dogecoin', 'litecoin', 'zcash']
-
-
-network_edge_dir = hlp.get_output_directory() / 'network_edges'
-
-for ledger in LEDGERS:
-    logging.info(f'Analyzing {ledger}')
-
-    output_dir = hlp.get_output_directory()
-    edges = []
-    nodes = set()
-    try:
-        with open(output_dir / 'network_edges' / f'{ledger}.csv') as f:
-            csv_reader = csv.reader(f)
-            next(csv_reader)
-            for source, dest in csv_reader:
-                nodes.add(source)
-                nodes.add(dest)
-                if source != dest:
-                    edges.append((source, dest))
-    except FileNotFoundError:
-        continue
-
-    G = nx.DiGraph()
-    G.add_edges_from(edges)
-
-    all_nodes = list(G.nodes())
-    logging.info(f'\t Nodes: {len(nodes):,} - Edges: {len(edges):,}')
-    logging.info(f'\t     Isolated nodes (no in/out edges): {len(nodes)-len(all_nodes):,}')
-
-    degrees = G.degree()
-    avg_degree = sum([i[1] for i in degrees]) / len(degrees)
-    logging.info(f'\t Average node degree: {avg_degree:,}')
-
-    is_strongly_connected = nx.is_strongly_connected(G)
-    logging.info(f'\t Is strongly connected: {is_strongly_connected}')
-
-    if is_strongly_connected:
-        diameter = nx.diameter(G)
-        logging.info(f'\t Diameter (largest component): {diameter:,}')
-    else:
-        diameter = {}
-        for node in all_nodes:
-            shortest_paths = nx.shortest_path(G, source=node)
-            longest_shortest_path = max(shortest_paths.items(), key=lambda x: len(x[1]))[1]
-            if longest_shortest_path == [node]:
-                diameter[node] = -1
-            else:
-                diameter[node] = len(longest_shortest_path)
-        logging.info(f'\t Diameter of known graph: {max(diameter.values())}')
-        logging.info(f'\t Nodes without outgoing edges: {len([i for i in diameter if diameter[i] == -1])}')

From 3f12547abc0584dd0e4bf1d390ef6d33be67cc66 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Wed, 4 Mar 2026 17:14:57 +0000
Subject: [PATCH 10/42] Delete unused script

---
 bitcoin/collect_osdata.py | 30 ------------------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 bitcoin/collect_osdata.py

diff --git a/bitcoin/collect_osdata.py b/bitcoin/collect_osdata.py
deleted file mode 100644
index 43e2cdb..0000000
--- a/bitcoin/collect_osdata.py
+++ /dev/null
@@ -1,30 +0,0 @@
-from network_decentralization.collect import collect_osdata
-import network_decentralization.helper as hlp
-import time
-import logging
-
-logging.basicConfig(format='[%(asctime)s] %(message)s', datefmt='%Y/%m/%d %I:%M:%S %p', level=logging.INFO)
-
-
-def main():
-    ledgers = hlp.get_ledgers()
-    timings = {}
-    for ledger in ledgers:
-        start = time.time()
-        collect_osdata(ledger, time.strftime('%Y-%m-%d'))
-        total_time = time.time() - start
-        timings[ledger] = total_time
-
-    print(2*'----------------\n')
-    for ledger in hlp.get_ledgers():
-        total_time = timings[ledger]
-        days = int(total_time / 86400)
-        hours = int((total_time - days*86400) / 3600)
-        mins = int((total_time - hours*3600 - days*86400) / 60)
-        secs = int(total_time - mins*60 - hours*3600 - days*86400)
-        print(f'\tcollect_osdata.py: {ledger} total time: {hours:02} hours, {mins:02} mins, {secs:02} secs')
-    print(2*'----------------\n')
-
-
-if __name__ == '__main__':
-    main()

From 68d7b752f81ae6d2f36a0716f75ad7bcbe91c767 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Wed, 4 Mar 2026 17:16:21 +0000
Subject: [PATCH 11/42] Remove unused analyze.py and collect_osdata.py

Removed unused scripts from README and directory structure.
---
 bitcoin/README.md | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/bitcoin/README.md b/bitcoin/README.md
index f34e25f..18c6c10 100644
--- a/bitcoin/README.md
+++ b/bitcoin/README.md
@@ -31,21 +31,16 @@ This component of the project analyses the decentralisation of Bitcoin, Bitcoin
 - **`compute_metrics.py`**  
   Computes network decentralisation metrics (HHI, Nakamoto coefficient, entropy, max power ratio) from CSV files.
 
-- **`analyze.py`** (Not in use)  
-  Analyses datasets to extract decentralisation metrics.
-
 - **`plot.py`**  
   Generates data visualisations.
 
 - **`collect_geodata.py`**  
   Uses third-party APIs to enrich nodes with geolocation info (country, city, organisation).
 
-- **`collect_osdata.py`** (Not in use)  
-  Identifies the operating system running on nodes.
-
 - **`cleanup_dead_nodes.py`**  
   Scans stored node datasets to remove offline or unreachable nodes.
 
+
 ### Automation & Configuration
 
 - **`automation.sh`**  
@@ -102,11 +97,9 @@ The scripts generate:
 ```
 bitcoin/
 │
-├── analyze.py
 ├── automation.sh
 ├── cleanup_dead_nodes.py
 ├── collect_geodata.py
-├── collect_osdata.py
 ├── crawl.py
 ├── parse.py
 ├── plot.py

From 117b95a88edff519f9cecbbbe365604f224ccd79 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 6 Mar 2026 14:50:29 +0000
Subject: [PATCH 12/42] Metrics now layer-agnostic

---
 .../metrics/concentration_ratio.py                 | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/bitcoin/network_decentralization/metrics/concentration_ratio.py b/bitcoin/network_decentralization/metrics/concentration_ratio.py
index 8893cb2..4c85cb5 100644
--- a/bitcoin/network_decentralization/metrics/concentration_ratio.py
+++ b/bitcoin/network_decentralization/metrics/concentration_ratio.py
@@ -1,9 +1,9 @@
-def compute_concentration_ratio(block_distribution, topn):
+def compute_concentration_ratio(distribution, topn):
     """
-    Calculates the n-concentration ratio of a distribution of balances
-    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
-    :param topn: the number of top block producers to consider
-    :returns: float that represents the ratio of blocks produced by the top n block producers (0 if there weren't any)
+    Calculates the n-concentration ratio of a distribution
+    :param distribution: list of non-negative counts per entity, sorted in descending order
+    :param topn: the number of top entities to consider
+    :returns: float that represents the ratio of total count held by the top n entities (0 if total is 0)
     """
-    total_blocks = sum(block_distribution)
-    return sum(block_distribution[:topn]) / total_blocks if total_blocks else 0
+    total = sum(distribution)
+    return sum(distribution[:topn]) / total if total else 0

From aa2dec56eae348ea0126ab9d11a856412e5b8dae Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 6 Mar 2026 14:50:59 +0000
Subject: [PATCH 13/42] Metrics now layer-agnostic

---
 .../metrics/entropy.py                        | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/bitcoin/network_decentralization/metrics/entropy.py b/bitcoin/network_decentralization/metrics/entropy.py
index 56e1b84..e4352b7 100644
--- a/bitcoin/network_decentralization/metrics/entropy.py
+++ b/bitcoin/network_decentralization/metrics/entropy.py
@@ -2,33 +2,33 @@
 from network_decentralization.metrics.total_entities import compute_total_entities
 
 
-def compute_entropy(block_distribution, alpha):
+def compute_entropy(distribution, alpha):
     """
-    Calculates the entropy of a distribution of blocks to entities
+    Calculates the entropy of an entity distribution.
     Pi is the relative frequency of each entity.
     Renyi entropy: 1/(1-alpha) * log2 (sum (Pi**alpha))
     Shannon entropy (alpha=1): −sum P(Si) log2 (Pi)
     Min entropy (alpha=-1): -log max Pi
-    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    :param distribution: list of non-negative counts per entity, sorted in descending order
     :param alpha: the entropy parameter (depending on its value the corresponding entropy measure is used)
     :returns: a float that represents the entropy of the data or None if the data is empty
     """
-    all_blocks = sum(block_distribution)
-    if all_blocks == 0:
+    total = sum(distribution)
+    if total == 0:
         return None
     if alpha == 1:
         entropy = 0
-        for value in block_distribution:
-            rel_freq = value / all_blocks
+        for value in distribution:
+            rel_freq = value / total
             if rel_freq > 0:
                 entropy -= rel_freq * log(rel_freq, 2)
     else:
         if alpha == -1:
-            entropy = - log(max(block_distribution)/all_blocks, 2)
+            entropy = -log(max(distribution) / total, 2)
         else:
             sum_freqs = 0
-            for entry in block_distribution:
-                sum_freqs += pow(entry/all_blocks, alpha)
+            for entry in distribution:
+                sum_freqs += pow(entry / total, alpha)
             entropy = log(sum_freqs, 2) / (1 - alpha)
 
     return entropy
@@ -38,11 +38,11 @@ def compute_max_entropy(num_entities, alpha):
     return compute_entropy([1 for i in range(num_entities)], alpha)
 
 
-def compute_entropy_percentage(block_distribution, alpha):
-    if sum(block_distribution) == 0:
+def compute_entropy_percentage(distribution, alpha):
+    if sum(distribution) == 0:
         return None
     try:
-        total_entities = compute_total_entities(block_distribution)
-        return compute_entropy(block_distribution, alpha) / compute_max_entropy(total_entities, alpha)
+        total_entities = compute_total_entities(distribution)
+        return compute_entropy(distribution, alpha) / compute_max_entropy(total_entities, alpha)
     except ZeroDivisionError:
         return 0

From 9e6e6807f9f374a8ff3851850c15dfc61bcd0e48 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 6 Mar 2026 14:51:16 +0000
Subject: [PATCH 14/42] Metrics now layer-agnostic

---
 .../metrics/herfindahl_hirschman_index.py          | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py b/bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py
index de22b87..fe6fc41 100644
--- a/bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py
+++ b/bitcoin/network_decentralization/metrics/herfindahl_hirschman_index.py
@@ -1,20 +1,20 @@
-def compute_hhi(block_distribution):
+def compute_hhi(distribution):
     """
-    Calculates the Herfindahl-Hirschman index of a distribution of blocks to entities
+    Calculates the Herfindahl-Hirschman index of an entity distribution.
     From investopedia: The HHI is calculated by squaring the market share of each firm competing in a market and then
     summing the resulting numbers. It can range from close to 0 to 10,000, with lower values indicating a less
     concentrated market. The U.S. Department of Justice considers a market with an HHI of less than 1,500 to be a
     competitive marketplace, an HHI of 1,500 to 2,500 to be a moderately concentrated marketplace,
     and an HHI of 2,500 or greater to be a highly concentrated marketplace.
-    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    :param distribution: list of non-negative counts per entity, sorted in descending order
     :return: float between 0 and 10,000 that represents the HHI of the given distribution or None if the data is empty
     """
-    total_blocks = sum(block_distribution)
-    if total_blocks == 0:
+    total = sum(distribution)
+    if total == 0:
         return None
 
     hhi = 0
-    for num_blocks in block_distribution:
-        hhi += pow(100 * num_blocks / total_blocks, 2)
+    for count in distribution:
+        hhi += pow(100 * count / total, 2)
 
     return hhi

From b5bd2ecf76adfe325044b2112311449349fd181e Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 6 Mar 2026 14:51:33 +0000
Subject: [PATCH 15/42] Metrics now layer-agnostic

---
 .../metrics/nakamoto_coefficient.py                       | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/bitcoin/network_decentralization/metrics/nakamoto_coefficient.py b/bitcoin/network_decentralization/metrics/nakamoto_coefficient.py
index 6f38992..e0bd938 100644
--- a/bitcoin/network_decentralization/metrics/nakamoto_coefficient.py
+++ b/bitcoin/network_decentralization/metrics/nakamoto_coefficient.py
@@ -1,10 +1,10 @@
 from network_decentralization.metrics.tau_index import compute_tau_index
 
 
-def compute_nakamoto_coefficient(block_distribution):
+def compute_nakamoto_coefficient(distribution):
     """
-    Calculates the Nakamoto coefficient of a distribution of blocks to entities
-    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    Calculates the Nakamoto coefficient of an entity distribution.
+    :param distribution: list of non-negative counts per entity, sorted in descending order
     :returns: int that represents the Nakamoto coefficient of the given distribution, or None if the data is empty
     """
-    return compute_tau_index(block_distribution, 0.5)
+    return compute_tau_index(distribution, 0.5)

From d524a6d104f6e32455655d37f127bf7b5c034c70 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 6 Mar 2026 14:51:53 +0000
Subject: [PATCH 16/42] Metrics now layer-agnostic

---
 .../metrics/tau_index.py                         | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/bitcoin/network_decentralization/metrics/tau_index.py b/bitcoin/network_decentralization/metrics/tau_index.py
index a87ae23..17edd5d 100644
--- a/bitcoin/network_decentralization/metrics/tau_index.py
+++ b/bitcoin/network_decentralization/metrics/tau_index.py
@@ -1,18 +1,18 @@
-def compute_tau_index(block_distribution, threshold):
+def compute_tau_index(distribution, threshold):
     """
-    Calculates the tau-decentralization index of a distribution of blocks
-    :param block_distribution: a list of integers, each being the blocks that an entity has produced, sorted in descending order
+    Calculates the tau-decentralization index of an entity distribution.
+    :param distribution: list of non-negative counts per entity, sorted in descending order
     :param threshold: float, the parameter of the tau-decentralization index, i.e. the threshold for the power
     ratio that is captured by the index (e.g. 0.66 for 66%)
-    :returns: int that corresponds to the tau index of the given distribution, or None if there were no blocks
+    :returns: int that corresponds to the tau index of the given distribution, or None if total is 0
     """
-    total_blocks = sum(block_distribution)
-    if total_blocks == 0:
+    total = sum(distribution)
+    if total == 0:
         return None
     tau_index, power_ratio_covered = 0, 0
-    for block_amount in block_distribution:
+    for amount in distribution:
         if power_ratio_covered >= threshold:
             break
         tau_index += 1
-        power_ratio_covered += block_amount / total_blocks
+        power_ratio_covered += amount / total
     return tau_index

From 32011d9ec3ab141daecd1eca03147ff340e47ded Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 6 Mar 2026 14:52:16 +0000
Subject: [PATCH 17/42] Metrics now layer-agnostic

---
 .../network_decentralization/metrics/total_entities.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/bitcoin/network_decentralization/metrics/total_entities.py b/bitcoin/network_decentralization/metrics/total_entities.py
index 3ebaabe..0a2e82b 100644
--- a/bitcoin/network_decentralization/metrics/total_entities.py
+++ b/bitcoin/network_decentralization/metrics/total_entities.py
@@ -1,7 +1,7 @@
-def compute_total_entities(block_distribution):
+def compute_total_entities(distribution):
     """
-    Computes the number of entities that have produced blocks in the given timeframe.
-    :param block_distribution: list of integers, each being the blocks that an entity has produced
-    :returns: an integer that represents the number of entities that have produced blocks
+    Computes the number of entities with a positive count in the given distribution.
+    :param distribution: list of non-negative counts per entity
+    :returns: number of entities with count > 0
     """
-    return len([v for v in block_distribution if v > 0])
+    return len([v for v in distribution if v > 0])

From bc1db11ef88858be3d22b1a445f9790b11a19ec9 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 13:03:16 +0000
Subject: [PATCH 18/42] Unused metrics

---
 .../metrics/tau_index.py                       | 18 ------------------
 1 file changed, 18 deletions(-)
 delete mode 100644 bitcoin/network_decentralization/metrics/tau_index.py

diff --git a/bitcoin/network_decentralization/metrics/tau_index.py b/bitcoin/network_decentralization/metrics/tau_index.py
deleted file mode 100644
index 17edd5d..0000000
--- a/bitcoin/network_decentralization/metrics/tau_index.py
+++ /dev/null
@@ -1,18 +0,0 @@
-def compute_tau_index(distribution, threshold):
-    """
-    Calculates the tau-decentralization index of an entity distribution.
-    :param distribution: list of non-negative counts per entity, sorted in descending order
-    :param threshold: float, the parameter of the tau-decentralization index, i.e. the threshold for the power
-    ratio that is captured by the index (e.g. 0.66 for 66%)
-    :returns: int that corresponds to the tau index of the given distribution, or None if total is 0
-    """
-    total = sum(distribution)
-    if total == 0:
-        return None
-    tau_index, power_ratio_covered = 0, 0
-    for amount in distribution:
-        if power_ratio_covered >= threshold:
-            break
-        tau_index += 1
-        power_ratio_covered += amount / total
-    return tau_index

From 517fdf8d4f8b257b1073d537073f719fabd33e7f Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 13:03:27 +0000
Subject: [PATCH 19/42] Unused metrics

---
 bitcoin/network_decentralization/metrics/total_entities.py | 7 -------
 1 file changed, 7 deletions(-)
 delete mode 100644 bitcoin/network_decentralization/metrics/total_entities.py

diff --git a/bitcoin/network_decentralization/metrics/total_entities.py b/bitcoin/network_decentralization/metrics/total_entities.py
deleted file mode 100644
index 0a2e82b..0000000
--- a/bitcoin/network_decentralization/metrics/total_entities.py
+++ /dev/null
@@ -1,7 +0,0 @@
-def compute_total_entities(distribution):
-    """
-    Computes the number of entities with a positive count in the given distribution.
-    :param distribution: list of non-negative counts per entity
-    :returns: number of entities with count > 0
-    """
-    return len([v for v in distribution if v > 0])

From f9d98bc59757e57d0dbe3d0f86a2f7d17ba34536 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 13:04:23 +0000
Subject: [PATCH 20/42] Add network and geo metrics to config.yaml

---
 bitcoin/config.yaml | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/bitcoin/config.yaml b/bitcoin/config.yaml
index fcf6110..42f8f52 100644
--- a/bitcoin/config.yaml
+++ b/bitcoin/config.yaml
@@ -23,3 +23,17 @@ last_time_active: 1
 # The first path will be used to write newly created dbs and the output of runs
 output_directories:  
   - ./output
+
+# Metrics for network analysis (organizations)
+network_metrics:
+  - HHI
+  - Nakamoto
+#  - Entropy
+  - Max Power Ratio
+
+# Metrics for geographic analysis (countries)
+geo_metrics:
+  - HHI
+  - Nakamoto
+  - Entropy
+  - Max Power Ratio

From 594013deb1aff4c3497541f860ac312298035ecd Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 13:06:58 +0000
Subject: [PATCH 21/42] Refactor script to accept metric names as parameters

---
 bitcoin/compute_metrics.py | 132 ++++++++++++++++++++-----------------
 1 file changed, 71 insertions(+), 61 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 06ef2aa..292c71f 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -7,6 +7,7 @@
 import csv
 import pathlib
 import sys
+from network_decentralization.helper import get_config_data, get_metrics_network, get_metrics_geo
 from network_decentralization.metrics.herfindahl_hirschman_index import compute_hhi
 from network_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient
 from network_decentralization.metrics.entropy import compute_entropy
@@ -58,34 +59,41 @@ def get_ledger_name(csv_path):
     return '_'.join(parts[1:])
 
 
-def compute_metrics(distribution):
+def compute_metrics(distribution, metric_names):
     """
-    Compute all metrics for a given distribution.
+    Compute specified metrics for a given distribution.
     
     :param distribution: Sorted list of entity counts (descending order)
-    :return: Dictionary with all computed metrics
+    :param metric_names: List of metric names to compute (e.g., ['HHI', 'Nakamoto', 'Entropy'])
+    :return: Dictionary with computed metric values
     """
     total = sum(distribution)
+    metrics = {}
+    
+    # Mapping of metric display names to computation functions
+    metric_map = {
+        'HHI': ('hhi', compute_hhi),
+        'Nakamoto': ('nakamoto', compute_nakamoto_coefficient),
+        'Entropy': ('entropy', lambda d: compute_entropy(d, alpha=1)),
+        'Max Power Ratio': ('max_power_ratio', lambda d: max(d) / total if d else 0)
+    }
     
     if total == 0:
-        return {
-            'hhi': None,
-            'nakamoto': None,
-            'entropy': None,
-            'max_power_ratio': None
-        }
+        return {metric_map[name][0]: None for name in metric_names if name in metric_map}
     
-    metrics = {
-        'hhi': compute_hhi(distribution),
-        'nakamoto': compute_nakamoto_coefficient(distribution),
-        'entropy': compute_entropy(distribution, alpha=1),  # Shannon entropy
-        'max_power_ratio': max(distribution) / total if distribution else 0
-    }
+    for metric_name in metric_names:
+        if metric_name in metric_map:
+            key, func = metric_map[metric_name]
+            try:
+                metrics[key] = func(distribution)
+            except Exception as e:
+                print(f"Error computing {metric_name}: {e}", file=sys.stderr)
+                metrics[key] = None
     
     return metrics
 
 
-def process_csv_files(output_dir, file_pattern, is_country=False):
+def process_csv_files(output_dir, file_pattern, is_country, metric_names):
     """
     Process all CSV files matching a pattern and output metrics.
     Appends results to existing files or creates new ones.
@@ -94,6 +102,7 @@ def process_csv_files(output_dir, file_pattern, is_country=False):
     :param output_dir: Path to the output directory
     :param file_pattern: Glob pattern for CSV files (e.g., 'organizations_*.csv')
     :param is_country: Boolean to indicate if processing country files
+    :param metric_names: List of metric names to compute and output
     """
     csv_files = sorted(output_dir.glob(file_pattern))
     
@@ -113,48 +122,46 @@ def process_csv_files(output_dir, file_pattern, is_country=False):
                     csv_path = without_tor_path
             
             date, distribution = read_csv_data(csv_path)
-            metrics = compute_metrics(distribution)
+            metrics = compute_metrics(distribution, metric_names)
+            
+            # Determine output filename and metric column mapping
+            file_type = 'countries' if is_country else 'organizations'
+            output_filename = f"output_{file_type}_{ledger}.csv"
+            output_path = output_dir / output_filename
+            file_exists = output_path.exists()
+            
+            # Map display names to internal keys for column ordering
+            metric_key_map = {
+                'HHI': 'hhi',
+                'Nakamoto': 'nakamoto',
+                'Entropy': 'entropy',
+                'Max Power Ratio': 'max_power_ratio'
+            }
             
-            # Determine output filename
-            if is_country:
-                output_filename = f"output_countries_{ledger}.csv"
-                output_path = output_dir / output_filename
-                file_exists = output_path.exists()
+            # Build header from metric names
+            header = ['ledger', 'date', 'clustering'] + metric_names
+            
+            # Write header and data (append if exists)
+            with open(output_path, 'a', newline='', encoding='utf-8') as f:
+                writer = csv.writer(f)
+                if not file_exists:
+                    writer.writerow(header)
                 
-                # Write header and data (append if exists)
-                with open(output_path, 'a', newline='', encoding='utf-8') as f:
-                    writer = csv.writer(f)
-                    if not file_exists:
-                        writer.writerow(['ledger', 'date', 'clustering', 'entropy', 'hhi', 'nakamoto_coefficient', 'max_power_ratio'])
-                    writer.writerow([
-                        ledger,
-                        date,
-                        'False',
-                        f"{metrics['entropy']:.15g}",
-                        f"{metrics['hhi']:.16g}",
-                        metrics['nakamoto'],
-                        f"{metrics['max_power_ratio']:.16g}"
-                    ])
-                print(f"Appended to: {output_filename}", file=sys.stderr)
-            else:
-                output_filename = f"output_organizations_{ledger}.csv"
-                output_path = output_dir / output_filename
-                file_exists = output_path.exists()
+                # Build row with metric values in the same order as header
+                row = [ledger, date, 'False']
+                for metric_display_name in metric_names:
+                    metric_key = metric_key_map.get(metric_display_name)
+                    value = metrics.get(metric_key) if metric_key else None
+                    if value is None:
+                        row.append('')
+                    elif isinstance(value, float):
+                        row.append(f"{value:.16g}")
+                    else:
+                        row.append(str(value))
                 
-                # Write header and data (append if exists)
-                with open(output_path, 'a', newline='', encoding='utf-8') as f:
-                    writer = csv.writer(f)
-                    if not file_exists:
-                        writer.writerow(['ledger', 'date', 'clustering', 'hhi', 'nakamoto_coefficient', 'max_power_ratio'])
-                    writer.writerow([
-                        ledger,
-                        date,
-                        'False',
-                        f"{metrics['hhi']:.16g}",
-                        metrics['nakamoto'],
-                        f"{metrics['max_power_ratio']:.16g}"
-                    ])
-                print(f"Appended to: {output_filename}", file=sys.stderr)
+                writer.writerow(row)
+            
+            print(f"Appended to: {output_filename}", file=sys.stderr)
             
         except Exception as e:
             print(f"Error processing {csv_path.name}: {e}", file=sys.stderr)
@@ -164,19 +171,22 @@ def process_csv_files(output_dir, file_pattern, is_country=False):
 def main():
     """
     Main entry point for the script.
-    Processes organization and country CSV files from the output directory.
+    Loads metric names from config and processes organization and country CSV files.
     """
+    # Load metric names from config using helper functions
+    network_metrics = get_metrics_network()
+    geo_metrics = get_metrics_geo()
+    
     output_dir = pathlib.Path(__file__).parent / 'output'
     
     if not output_dir.exists():
         print(f"Error: Output directory not found at {output_dir}", file=sys.stderr)
         sys.exit(1)
+    # Process organization files with network metrics
+    process_csv_files(output_dir, 'organizations_*.csv', is_country=False, metric_names=network_metrics)
     
-    # Process organization files
-    process_csv_files(output_dir, 'organizations_*.csv', is_country=False)
-    
-    # Process country files
-    process_csv_files(output_dir, 'countries_*.csv', is_country=True)
+    # Process country files with geo metrics
+    process_csv_files(output_dir, 'countries_*.csv', is_country=True, metric_names=geo_metrics)
 
 
 if __name__ == '__main__':

From 3fa89d8a1604000346acc31e221b674473d9a1d7 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 13:07:41 +0000
Subject: [PATCH 22/42] Add functions to retrieve network and geo metrics from
 config file

---
 bitcoin/network_decentralization/helper.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/bitcoin/network_decentralization/helper.py b/bitcoin/network_decentralization/helper.py
index 7a6651e..bd49263 100644
--- a/bitcoin/network_decentralization/helper.py
+++ b/bitcoin/network_decentralization/helper.py
@@ -64,6 +64,22 @@ def get_concurrency():
     return get_config_data()['execution_parameters']['concurrency']
 
 
+def get_metrics_network():
+    """
+    Retrieves the list of metrics to compute for network analysis (organizations)
+    :returns: a list of metric names to compute
+    """
+    return get_config_data().get('network_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Max Power Ratio'])
+
+
+def get_metrics_geo():
+    """
+    Retrieves the list of metrics to compute for geographic analysis (countries)
+    :returns: a list of metric names to compute
+    """
+    return get_config_data().get('geo_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Max Power Ratio'])
+
+
 def get_output_directory(ledger=None, dead=False):
     """
     Reads the config file and retrieves the output directory

From 639e404485024a726b0496511f4ba30ac08595ad Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 13:15:27 +0000
Subject: [PATCH 23/42] Add metrics directory and related Python files

---
 bitcoin/README.md | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/bitcoin/README.md b/bitcoin/README.md
index 18c6c10..a7a02e5 100644
--- a/bitcoin/README.md
+++ b/bitcoin/README.md
@@ -116,7 +116,12 @@ bitcoin/
 │   ├── collect.py
 │   ├── constants.py
 │   ├── helper.py
-│   └── protocol.py
+│   ├── protocol.py
+│   └── metrics/
+│       ├── concentration_ratio.py
+│       ├── entropy.py
+│       ├── herfindahl_hirschman_index.py
+│       └── nakamoto_coefficient.py
 │
 └── seed_info/
     ├── bitcoin.json

From 3a35644a14c65689eb452f82245adef56acbabc8 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 13:19:39 +0000
Subject: [PATCH 24/42] Refactor ledger name extraction and processing logic

---
 bitcoin/compute_metrics.py | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 292c71f..75b739c 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -54,6 +54,7 @@ def get_ledger_name(csv_path):
     :return: Ledger name (e.g., 'bitcoin', 'bitcoin_cash')
     """
     filename = csv_path.stem  # Get filename without extension
+    filename = filename.replace('_without_tor', '')  # Normalize bitcoin without_tor variant
     parts = filename.split('_')
     # Remove 'organizations' or 'countries' prefix
     return '_'.join(parts[1:])
@@ -104,23 +105,19 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names):
     :param is_country: Boolean to indicate if processing country files
     :param metric_names: List of metric names to compute and output
     """
+    # For bitcoin, prefer the _without_tor variant if it exists; skip the regular bitcoin file in that case
+    file_type = 'countries' if is_country else 'organizations'
+    without_tor_path = output_dir / f"{file_type}_bitcoin_without_tor.csv"
+    skip_regular_bitcoin = without_tor_path.exists()
+
     csv_files = sorted(output_dir.glob(file_pattern))
     
     for csv_path in csv_files:
-        # Skip _without_tor files in the glob - we'll handle them explicitly for bitcoin
-        if '_without_tor' in csv_path.name:
+        if csv_path.name == f"{file_type}_bitcoin.csv" and skip_regular_bitcoin:
             continue
             
         try:
             ledger = get_ledger_name(csv_path)
-            
-            # For bitcoin, check if _without_tor version exists and use that instead
-            if ledger == 'bitcoin':
-                file_type = 'countries' if is_country else 'organizations'
-                without_tor_path = output_dir / f"{file_type}_bitcoin_without_tor.csv"
-                if without_tor_path.exists():
-                    csv_path = without_tor_path
-            
             date, distribution = read_csv_data(csv_path)
             metrics = compute_metrics(distribution, metric_names)
             

From 1061ea4ae4f90f98990c64635cc264c636d98530 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:04:37 +0000
Subject: [PATCH 25/42] Refactor compute_metrics to use
 compute_concentration_ratio

---
 bitcoin/compute_metrics.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 75b739c..ab97205 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -7,7 +7,7 @@
 import csv
 import pathlib
 import sys
-from network_decentralization.helper import get_config_data, get_metrics_network, get_metrics_geo
+from network_decentralization.helper import get_metrics_network, get_metrics_geo
 from network_decentralization.metrics.herfindahl_hirschman_index import compute_hhi
 from network_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient
 from network_decentralization.metrics.entropy import compute_entropy
@@ -76,7 +76,7 @@ def compute_metrics(distribution, metric_names):
         'HHI': ('hhi', compute_hhi),
         'Nakamoto': ('nakamoto', compute_nakamoto_coefficient),
         'Entropy': ('entropy', lambda d: compute_entropy(d, alpha=1)),
-        'Max Power Ratio': ('max_power_ratio', lambda d: max(d) / total if d else 0)
+        'Max Power Ratio': ('max_power_ratio', lambda d: compute_concentration_ratio(d, topn=1))
     }
     
     if total == 0:

From 4ffcd558b3281daecd5c4ee3ce66f11968b2523e Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:19:08 +0000
Subject: [PATCH 26/42] Concentration ratio's top-N is now a parameter

---
 bitcoin/compute_metrics.py | 76 +++++++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 25 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index ab97205..887c538 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -7,7 +7,7 @@
 import csv
 import pathlib
 import sys
-from network_decentralization.helper import get_metrics_network, get_metrics_geo
+from network_decentralization.helper import get_metrics_network, get_metrics_geo, get_concentration_ratio_topn
 from network_decentralization.metrics.herfindahl_hirschman_index import compute_hhi
 from network_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient
 from network_decentralization.metrics.entropy import compute_entropy
@@ -60,29 +60,37 @@ def get_ledger_name(csv_path):
     return '_'.join(parts[1:])
 
 
-def compute_metrics(distribution, metric_names):
+def compute_metrics(distribution, metric_names, concentration_ratio_topn):
     """
     Compute specified metrics for a given distribution.
     
     :param distribution: Sorted list of entity counts (descending order)
     :param metric_names: List of metric names to compute (e.g., ['HHI', 'Nakamoto', 'Entropy'])
+    :param concentration_ratio_topn: list of top-N parameters for concentration ratio metrics
     :return: Dictionary with computed metric values
     """
-    total = sum(distribution)
     metrics = {}
-    
+
     # Mapping of metric display names to computation functions
+    # Concentration Ratio is handled separately because one metric name expands to multiple outputs (one per configured top-N value).
     metric_map = {
         'HHI': ('hhi', compute_hhi),
         'Nakamoto': ('nakamoto', compute_nakamoto_coefficient),
         'Entropy': ('entropy', lambda d: compute_entropy(d, alpha=1)),
-        'Max Power Ratio': ('max_power_ratio', lambda d: compute_concentration_ratio(d, topn=1))
     }
     
-    if total == 0:
-        return {metric_map[name][0]: None for name in metric_names if name in metric_map}
-    
     for metric_name in metric_names:
+        # Keep legacy 'Max Power Ratio' as an alias so older configs still work.
+        if metric_name in ('Concentration Ratio', 'Max Power Ratio'):
+            for topn in concentration_ratio_topn:
+                key = f"concentration_ratio_top_{topn}"
+                try:
+                    metrics[key] = compute_concentration_ratio(distribution, topn=topn)
+                except Exception as e:
+                    print(f"Error computing {metric_name} (topn={topn}): {e}", file=sys.stderr)
+                    metrics[key] = None
+            continue
+
         if metric_name in metric_map:
             key, func = metric_map[metric_name]
             try:
@@ -94,7 +102,7 @@ def compute_metrics(distribution, metric_names):
     return metrics
 
 
-def process_csv_files(output_dir, file_pattern, is_country, metric_names):
+def process_csv_files(output_dir, file_pattern, is_country, metric_names, concentration_ratio_topn):
     """
     Process all CSV files matching a pattern and output metrics.
     Appends results to existing files or creates new ones.
@@ -104,6 +112,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names):
     :param file_pattern: Glob pattern for CSV files (e.g., 'organizations_*.csv')
     :param is_country: Boolean to indicate if processing country files
     :param metric_names: List of metric names to compute and output
+    :param concentration_ratio_topn: list of top-N parameters for concentration ratio metrics
     """
     # For bitcoin, prefer the _without_tor variant if it exists; skip the regular bitcoin file in that case
     file_type = 'countries' if is_country else 'organizations'
@@ -119,7 +128,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names):
         try:
             ledger = get_ledger_name(csv_path)
             date, distribution = read_csv_data(csv_path)
-            metrics = compute_metrics(distribution, metric_names)
+            metrics = compute_metrics(distribution, metric_names, concentration_ratio_topn)
             
             # Determine output filename and metric column mapping
             file_type = 'countries' if is_country else 'organizations'
@@ -127,16 +136,21 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names):
             output_path = output_dir / output_filename
             file_exists = output_path.exists()
             
-            # Map display names to internal keys for column ordering
-            metric_key_map = {
-                'HHI': 'hhi',
-                'Nakamoto': 'nakamoto',
-                'Entropy': 'entropy',
-                'Max Power Ratio': 'max_power_ratio'
-            }
-            
-            # Build header from metric names
-            header = ['ledger', 'date', 'clustering'] + metric_names
+            # Build output metric columns from selected metrics.
+            metric_columns = []
+            for metric_name in metric_names:
+                # Keep legacy 'Max Power Ratio' as an alias so older configs still work.
+                if metric_name in ('Concentration Ratio', 'Max Power Ratio'):
+                    for topn in concentration_ratio_topn:
+                        metric_columns.append((f"Concentration Ratio (Top {topn})", f"concentration_ratio_top_{topn}"))
+                elif metric_name == 'HHI':
+                    metric_columns.append(('HHI', 'hhi'))
+                elif metric_name == 'Nakamoto':
+                    metric_columns.append(('Nakamoto', 'nakamoto'))
+                elif metric_name == 'Entropy':
+                    metric_columns.append(('Entropy', 'entropy'))
+
+            header = ['ledger', 'date', 'clustering'] + [column[0] for column in metric_columns]
             
             # Write header and data (append if exists)
             with open(output_path, 'a', newline='', encoding='utf-8') as f:
@@ -146,9 +160,8 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names):
                 
                 # Build row with metric values in the same order as header
                 row = [ledger, date, 'False']
-                for metric_display_name in metric_names:
-                    metric_key = metric_key_map.get(metric_display_name)
-                    value = metrics.get(metric_key) if metric_key else None
+                for _, metric_key in metric_columns:
+                    value = metrics.get(metric_key)
                     if value is None:
                         row.append('')
                     elif isinstance(value, float):
@@ -173,6 +186,7 @@ def main():
     # Load metric names from config using helper functions
     network_metrics = get_metrics_network()
     geo_metrics = get_metrics_geo()
+    concentration_ratio_topn = get_concentration_ratio_topn()
     
     output_dir = pathlib.Path(__file__).parent / 'output'
     
@@ -180,10 +194,22 @@ def main():
         print(f"Error: Output directory not found at {output_dir}", file=sys.stderr)
         sys.exit(1)
     # Process organization files with network metrics
-    process_csv_files(output_dir, 'organizations_*.csv', is_country=False, metric_names=network_metrics)
+    process_csv_files(
+        output_dir,
+        'organizations_*.csv',
+        is_country=False,
+        metric_names=network_metrics,
+        concentration_ratio_topn=concentration_ratio_topn,
+    )
     
     # Process country files with geo metrics
-    process_csv_files(output_dir, 'countries_*.csv', is_country=True, metric_names=geo_metrics)
+    process_csv_files(
+        output_dir,
+        'countries_*.csv',
+        is_country=True,
+        metric_names=geo_metrics,
+        concentration_ratio_topn=concentration_ratio_topn,
+    )
 
 
 if __name__ == '__main__':

From d69344ec4266a8beaf389edabc42836794727aef Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:19:53 +0000
Subject: [PATCH 27/42] Concentration ratio's top-N is now a parameter

---
 bitcoin/network_decentralization/helper.py | 27 ++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/bitcoin/network_decentralization/helper.py b/bitcoin/network_decentralization/helper.py
index bd49263..e1178d6 100644
--- a/bitcoin/network_decentralization/helper.py
+++ b/bitcoin/network_decentralization/helper.py
@@ -69,7 +69,7 @@ def get_metrics_network():
     Retrieves the list of metrics to compute for network analysis (organizations)
     :returns: a list of metric names to compute
     """
-    return get_config_data().get('network_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Max Power Ratio'])
+    return get_config_data().get('network_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Concentration Ratio'])
 
 
 def get_metrics_geo():
@@ -77,7 +77,30 @@ def get_metrics_geo():
     Retrieves the list of metrics to compute for geographic analysis (countries)
     :returns: a list of metric names to compute
     """
-    return get_config_data().get('geo_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Max Power Ratio'])
+    return get_config_data().get('geo_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Concentration Ratio'])
+
+
+def get_concentration_ratio_topn():
+    """
+    Retrieves top-N values used by concentration-ratio based metrics.
+    :returns: list of unique positive integers (defaults to [1, 3])
+    """
+    params = get_config_data().get('metrics_parameters', {})
+    raw_topn = params.get('concentration_ratio_topn', [1, 3])
+
+    if not isinstance(raw_topn, list):
+        raw_topn = [raw_topn]
+
+    values = []
+    for value in raw_topn:
+        try:
+            parsed = int(value)
+            if parsed > 0 and parsed not in values:
+                values.append(parsed)
+        except (TypeError, ValueError):
+            continue
+
+    return values if values else [1, 3]
 
 
 def get_output_directory(ledger=None, dead=False):

From 61a96a563cdf2f1517b0f24c033241a46a5332f3 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:20:19 +0000
Subject: [PATCH 28/42] Concentration ratio's top-N is now a parameter

---
 bitcoin/config.yaml | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/bitcoin/config.yaml b/bitcoin/config.yaml
index 42f8f52..dd3b8be 100644
--- a/bitcoin/config.yaml
+++ b/bitcoin/config.yaml
@@ -29,11 +29,17 @@ network_metrics:
   - HHI
   - Nakamoto
 #  - Entropy
-  - Max Power Ratio
+  - Concentration Ratio
 
 # Metrics for geographic analysis (countries)
 geo_metrics:
   - HHI
   - Nakamoto
   - Entropy
-  - Max Power Ratio
+  - Concentration Ratio
+
+# Parameters for metric computation
+metrics_parameters:
+  concentration_ratio_topn:
+    - 1
+    - 3

From 7860933bd79efe5c9904a33816e07c48e4bf5a25 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:49:44 +0000
Subject: [PATCH 29/42] Add function to get ledgers without Tor parameter

---
 bitcoin/network_decentralization/helper.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/bitcoin/network_decentralization/helper.py b/bitcoin/network_decentralization/helper.py
index e1178d6..0b80686 100644
--- a/bitcoin/network_decentralization/helper.py
+++ b/bitcoin/network_decentralization/helper.py
@@ -103,6 +103,24 @@ def get_concentration_ratio_topn():
     return values if values else [1, 3]
 
 
+def get_without_tor_ledgers():
+    """
+    Retrieves the target ledgers for generating *_without_tor CSV files.
+    :returns: list of ledger names, or None when not configured
+    """
+    params = get_config_data().get('parse_parameters', {})
+    raw_ledgers = params.get('without_tor_ledgers')
+
+    if raw_ledgers is None:
+        return None
+
+    if not isinstance(raw_ledgers, list):
+        raw_ledgers = [raw_ledgers]
+
+    ledgers = [ledger.strip() for ledger in raw_ledgers if isinstance(ledger, str) and ledger.strip()]
+    return list(dict.fromkeys(ledgers)) or None
+
+
 def get_output_directory(ledger=None, dead=False):
     """
     Reads the config file and retrieves the output directory

From 74672c158a00054d27d42570060618fef98d2052 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:50:10 +0000
Subject: [PATCH 30/42] Add parse_parameters for ledgers without Tor

---
 bitcoin/config.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/bitcoin/config.yaml b/bitcoin/config.yaml
index dd3b8be..9490e31 100644
--- a/bitcoin/config.yaml
+++ b/bitcoin/config.yaml
@@ -43,3 +43,8 @@ metrics_parameters:
   concentration_ratio_topn:
     - 1
     - 3
+
+# Parameters for parsing/output generation
+parse_parameters:
+  without_tor_ledgers:
+    - bitcoin

From 2a5d15f43536ee1d58c419242e3df01158c5be4c Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:51:09 +0000
Subject: [PATCH 31/42] Ledgers without Tor are now parameters

---
 bitcoin/parse.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/bitcoin/parse.py b/bitcoin/parse.py
index 7fa39c2..4793020 100644
--- a/bitcoin/parse.py
+++ b/bitcoin/parse.py
@@ -350,16 +350,16 @@ def redistribute_tor_nodes(name, ledger, df, mode):
     df_without_tor[[mode, date]].to_csv(f'./output/{name}_{ledger}_without_tor.csv', index=False)  # save the updated DataFrame to a new CSV
 
 
-def without_tor():
+def create_without_tor_files(ledger):
     """
-    Loads a CSV file and calls the redistribute_tor_nodes function.
+    Loads CSV files for the given ledger and redistributes Tor nodes.
+    :param ledger: the ledger to process for *_without_tor output files
     """
-    ledger = 'bitcoin'
     modes = ['Countries', 'Organizations']
     for mode in modes:
         logging.info(f'parse.py: Removing Tor from {ledger} {mode}')
         name = mode.lower()
-        filename = pathlib.Path(f'./output/{name}_{ledger}.csv')
+        filename = Path(f'./output/{name}_{ledger}.csv')
         if not filename.is_file():
             logging.warning(f"File not found: {filename}")
             return None
@@ -420,6 +420,7 @@ def cluster_organizations(ledger):
 
 def main():
     logging.info('Start parsing')
+    without_tor_ledgers = set(hlp.get_without_tor_ledgers() or [])
 
     reachable_nodes = {}
     for ledger in LEDGERS:
@@ -429,8 +430,8 @@ def main():
             geography(reachable_nodes, ledger, mode)
         if 'Organizations' in MODES:
             cluster_organizations(ledger)
-        if 'bitcoin' in LEDGERS:
-            without_tor()
+        if ledger in without_tor_ledgers:
+            create_without_tor_files(ledger)
 
 if __name__ == '__main__':
     main()

From b641976bd16f6990f8905cc3391d5361e9073fde Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 14:54:11 +0000
Subject: [PATCH 32/42] Rename parameter

---
 bitcoin/parse.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/bitcoin/parse.py b/bitcoin/parse.py
index 4793020..90f86fd 100644
--- a/bitcoin/parse.py
+++ b/bitcoin/parse.py
@@ -327,10 +327,10 @@ def version(reachable_nodes, mode):
             versions_df.to_csv(f'./output/{name.lower()}_{ledger}.csv', index_label = name)
 
 
-def redistribute_tor_nodes(name, ledger, df, mode):
+def redistribute_tor_nodes(mode_lower, ledger, df, mode):
     """
     Redistributes Tor node count proportionally across non-Tor rows.
-    :param name: lowercase version of the mode ('countries' or 'organizations') used in file naming.
+    :param mode_lower: lowercase version of mode ('countries' or 'organizations') used in file naming.
     :param ledger: the ledger name.
     :param df: the dataframe in which the Tor nodes must be reditributed.
     :param mode: the mode name (e.g., 'Countries', 'Organizations').
@@ -347,7 +347,7 @@ def redistribute_tor_nodes(name, ledger, df, mode):
     df['Distribution'] = df.apply(lambda row: round((row[f'{date}'] / number_of_total_nodes_without_tor) * number_of_tor_nodes) if row[f'{mode}'] != 'Tor' else 0, axis=1)  # create a new column 'Distribution' that distributes the Tor nodes proportionally to non-Tor rows
     df[date] = df[date] + df['Distribution']
     df_without_tor = df[df[f'{mode}'] != 'Tor']  # filter out the Tor row
-    df_without_tor[[mode, date]].to_csv(f'./output/{name}_{ledger}_without_tor.csv', index=False)  # save the updated DataFrame to a new CSV
+    df_without_tor[[mode, date]].to_csv(f'./output/{mode_lower}_{ledger}_without_tor.csv', index=False)  # save the updated DataFrame to a new CSV
 
 
 def create_without_tor_files(ledger):
@@ -358,13 +358,13 @@ def create_without_tor_files(ledger):
     modes = ['Countries', 'Organizations']
     for mode in modes:
         logging.info(f'parse.py: Removing Tor from {ledger} {mode}')
-        name = mode.lower()
-        filename = Path(f'./output/{name}_{ledger}.csv')
+        mode_lower = mode.lower()
+        filename = Path(f'./output/{mode_lower}_{ledger}.csv')
         if not filename.is_file():
             logging.warning(f"File not found: {filename}")
             return None
         df = pd.read_csv(filename)
-        redistribute_tor_nodes(name, ledger, df, mode)
+        redistribute_tor_nodes(mode_lower, ledger, df, mode)
 
 
 def cluster_organizations(ledger):

From 40590a591bf3f56a7f053ad302c6d1176f2d2c7f Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 10 Mar 2026 15:10:51 +0000
Subject: [PATCH 33/42] Update automation.sh

---
 bitcoin/automation.sh | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/bitcoin/automation.sh b/bitcoin/automation.sh
index 995e2f2..371b4da 100644
--- a/bitcoin/automation.sh
+++ b/bitcoin/automation.sh
@@ -8,14 +8,13 @@ do
 python3 crawl.py # comment this line if new data must not be gathered
 python3 cleanup_dead_nodes.py
 python3 collect_geodata.py
-#python3 collect_osdata.py # not in use
 python3 parse.py
 python3 plot.py
 python3 compute_metrics.py
 
 # The following 2 lines create a folder and move all png and csv files to it
 mkdir output/"$(date +%Y-%m-%d)"
-mv -t output/"$(date +%Y-%m-%d)" output/{clients,countries,protocols,organizations,ip_type}_*.csv output/response_length.json 2>/dev/null || true
+mv -t output/"$(date +%Y-%m-%d)" output/{clients,countries,protocols,organizations,ip}_*.csv output/response_length.json 2>/dev/null || true
 
 sleep 7d # will repeat the whole process every X days
 

From a995b9de68a377c2477a8d9ca9c267163a49d498 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 13 Mar 2026 12:31:22 +0000
Subject: [PATCH 34/42] Revert "Unused metrics"

This reverts commit 517fdf8d4f8b257b1073d537073f719fabd33e7f.
---
 bitcoin/network_decentralization/metrics/total_entities.py | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 bitcoin/network_decentralization/metrics/total_entities.py

diff --git a/bitcoin/network_decentralization/metrics/total_entities.py b/bitcoin/network_decentralization/metrics/total_entities.py
new file mode 100644
index 0000000..0a2e82b
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/total_entities.py
@@ -0,0 +1,7 @@
+def compute_total_entities(distribution):
+    """
+    Computes the number of entities with a positive count in the given distribution.
+    :param distribution: list of non-negative counts per entity
+    :returns: number of entities with count > 0
+    """
+    return len([v for v in distribution if v > 0])

From f73591e27c9fa214fad019f1cdb1e84f0df5b6f4 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Fri, 13 Mar 2026 12:31:37 +0000
Subject: [PATCH 35/42] Revert "Unused metrics"

This reverts commit bc1db11ef88858be3d22b1a445f9790b11a19ec9.
---
 .../metrics/tau_index.py                       | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 bitcoin/network_decentralization/metrics/tau_index.py

diff --git a/bitcoin/network_decentralization/metrics/tau_index.py b/bitcoin/network_decentralization/metrics/tau_index.py
new file mode 100644
index 0000000..17edd5d
--- /dev/null
+++ b/bitcoin/network_decentralization/metrics/tau_index.py
@@ -0,0 +1,18 @@
+def compute_tau_index(distribution, threshold):
+    """
+    Calculates the tau-decentralization index of an entity distribution.
+    :param distribution: list of non-negative counts per entity, sorted in descending order
+    :param threshold: float, the parameter of the tau-decentralization index, i.e. the threshold for the power
+    ratio that is captured by the index (e.g. 0.66 for 66%)
+    :returns: int that corresponds to the tau index of the given distribution, or None if total is 0
+    """
+    total = sum(distribution)
+    if total == 0:
+        return None
+    tau_index, power_ratio_covered = 0, 0
+    for amount in distribution:
+        if power_ratio_covered >= threshold:
+            break
+        tau_index += 1
+        power_ratio_covered += amount / total
+    return tau_index

From a3a1d908cc003d33ea6bea53ba6628cbbbb9c040 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Mon, 16 Mar 2026 11:41:49 +0000
Subject: [PATCH 36/42] Update automation.sh

---
 bitcoin/automation.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bitcoin/automation.sh b/bitcoin/automation.sh
index 371b4da..2c58ebc 100644
--- a/bitcoin/automation.sh
+++ b/bitcoin/automation.sh
@@ -14,7 +14,7 @@ python3 compute_metrics.py
 
 # The following 2 lines create a folder and move all png and csv files to it
 mkdir output/"$(date +%Y-%m-%d)"
-mv -t output/"$(date +%Y-%m-%d)" output/{clients,countries,protocols,organizations,ip}_*.csv output/response_length.json 2>/dev/null || true
+mv -t output/"$(date +%Y-%m-%d)" output/{clients,countries,protocols,organizations,ip,discovery,peerstore}*.csv output/response_length.json output/*.png 2>/dev/null || true
 
 sleep 7d # will repeat the whole process every X days
 

From 2bccfdd0bce5fbf51a52cbc68a78b655c78dcbb5 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 17 Mar 2026 10:25:26 +0000
Subject: [PATCH 37/42] Remove 'max power ratio' occurrences

---
 bitcoin/README.md          | 2 +-
 bitcoin/compute_metrics.py | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/bitcoin/README.md b/bitcoin/README.md
index a7a02e5..e319f18 100644
--- a/bitcoin/README.md
+++ b/bitcoin/README.md
@@ -29,7 +29,7 @@ This component of the project analyses the decentralisation of Bitcoin, Bitcoin
   Processes raw data (e.g., logs from crawling) into structured formats (JSON, CSV) for easier analysis and plotting.
 
 - **`compute_metrics.py`**  
-  Computes network decentralisation metrics (HHI, Nakamoto coefficient, entropy, max power ratio) from CSV files.
+  Computes network decentralisation metrics (HHI, Nakamoto coefficient, entropy, concentration ratios) from CSV files.
 
 - **`plot.py`**  
   Generates data visualisations.
diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 887c538..6be5cb0 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -80,8 +80,7 @@ def compute_metrics(distribution, metric_names, concentration_ratio_topn):
     }
     
     for metric_name in metric_names:
-        # Keep legacy 'Max Power Ratio' as an alias so older configs still work.
-        if metric_name in ('Concentration Ratio', 'Max Power Ratio'):
+        if metric_name == 'Concentration Ratio':
             for topn in concentration_ratio_topn:
                 key = f"concentration_ratio_top_{topn}"
                 try:
@@ -139,8 +138,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
             # Build output metric columns from selected metrics.
             metric_columns = []
             for metric_name in metric_names:
-                # Keep legacy 'Max Power Ratio' as an alias so older configs still work.
-                if metric_name in ('Concentration Ratio', 'Max Power Ratio'):
+                if metric_name == 'Concentration Ratio':
                     for topn in concentration_ratio_topn:
                         metric_columns.append((f"Concentration Ratio (Top {topn})", f"concentration_ratio_top_{topn}"))
                 elif metric_name == 'HHI':

From 8e6431bcbb654c5954c9e46564f8129f28f9f7b6 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 17 Mar 2026 10:35:14 +0000
Subject: [PATCH 38/42] Update compute_metrics.py

---
 bitcoin/compute_metrics.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 6be5cb0..92629b3 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -141,12 +141,8 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
                 if metric_name == 'Concentration Ratio':
                     for topn in concentration_ratio_topn:
                         metric_columns.append((f"Concentration Ratio (Top {topn})", f"concentration_ratio_top_{topn}"))
-                elif metric_name == 'HHI':
-                    metric_columns.append(('HHI', 'hhi'))
-                elif metric_name == 'Nakamoto':
-                    metric_columns.append(('Nakamoto', 'nakamoto'))
-                elif metric_name == 'Entropy':
-                    metric_columns.append(('Entropy', 'entropy'))
+                else:
+                    metric_columns.append((metric_name, metric_name.lower()))
 
             header = ['ledger', 'date', 'clustering'] + [column[0] for column in metric_columns]
             

From f23044569252826ef4a7ed8f9d1205543e63e0dd Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Tue, 17 Mar 2026 11:13:21 +0000
Subject: [PATCH 39/42] Entropy function's alpha now configurable

---
 bitcoin/compute_metrics.py                 | 29 ++++++++++++++++++----
 bitcoin/config.yaml                        |  2 ++
 bitcoin/network_decentralization/helper.py | 23 +++++++++++++++++
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 92629b3..455b92e 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -7,7 +7,7 @@
 import csv
 import pathlib
 import sys
-from network_decentralization.helper import get_metrics_network, get_metrics_geo, get_concentration_ratio_topn
+from network_decentralization.helper import get_metrics_network, get_metrics_geo, get_concentration_ratio_topn, get_entropy_alphas
 from network_decentralization.metrics.herfindahl_hirschman_index import compute_hhi
 from network_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient
 from network_decentralization.metrics.entropy import compute_entropy
@@ -60,13 +60,14 @@ def get_ledger_name(csv_path):
     return '_'.join(parts[1:])
 
 
-def compute_metrics(distribution, metric_names, concentration_ratio_topn):
+def compute_metrics(distribution, metric_names, concentration_ratio_topn, entropy_alphas):
     """
     Compute specified metrics for a given distribution.
     
     :param distribution: Sorted list of entity counts (descending order)
     :param metric_names: List of metric names to compute (e.g., ['HHI', 'Nakamoto', 'Entropy'])
     :param concentration_ratio_topn: list of top-N parameters for concentration ratio metrics
+    :param entropy_alphas: alpha parameters used for entropy metric computation
     :return: Dictionary with computed metric values
     """
     metrics = {}
@@ -76,7 +77,6 @@ def compute_metrics(distribution, metric_names, concentration_ratio_topn):
     metric_map = {
         'HHI': ('hhi', compute_hhi),
         'Nakamoto': ('nakamoto', compute_nakamoto_coefficient),
-        'Entropy': ('entropy', lambda d: compute_entropy(d, alpha=1)),
     }
     
     for metric_name in metric_names:
@@ -90,6 +90,17 @@ def compute_metrics(distribution, metric_names, concentration_ratio_topn):
                     metrics[key] = None
             continue
 
+        if metric_name == 'Entropy':
+            for alpha in entropy_alphas:
+                alpha_str = f"{alpha:g}"
+                key = f"entropy_alpha_{alpha_str}"
+                try:
+                    metrics[key] = compute_entropy(distribution, alpha=alpha)
+                except Exception as e:
+                    print(f"Error computing {metric_name} (alpha={alpha_str}): {e}", file=sys.stderr)
+                    metrics[key] = None
+            continue
+
         if metric_name in metric_map:
             key, func = metric_map[metric_name]
             try:
@@ -101,7 +112,7 @@ def compute_metrics(distribution, metric_names, concentration_ratio_topn):
     return metrics
 
 
-def process_csv_files(output_dir, file_pattern, is_country, metric_names, concentration_ratio_topn):
+def process_csv_files(output_dir, file_pattern, is_country, metric_names, concentration_ratio_topn, entropy_alphas):
     """
     Process all CSV files matching a pattern and output metrics.
     Appends results to existing files or creates new ones.
@@ -112,6 +123,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
     :param is_country: Boolean to indicate if processing country files
     :param metric_names: List of metric names to compute and output
     :param concentration_ratio_topn: list of top-N parameters for concentration ratio metrics
+    :param entropy_alphas: alpha parameters used for entropy metric computation
     """
     # For bitcoin, prefer the _without_tor variant if it exists; skip the regular bitcoin file in that case
     file_type = 'countries' if is_country else 'organizations'
@@ -127,7 +139,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
         try:
             ledger = get_ledger_name(csv_path)
             date, distribution = read_csv_data(csv_path)
-            metrics = compute_metrics(distribution, metric_names, concentration_ratio_topn)
+            metrics = compute_metrics(distribution, metric_names, concentration_ratio_topn, entropy_alphas)
             
             # Determine output filename and metric column mapping
             file_type = 'countries' if is_country else 'organizations'
@@ -141,6 +153,10 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
                 if metric_name == 'Concentration Ratio':
                     for topn in concentration_ratio_topn:
                         metric_columns.append((f"Concentration Ratio (Top {topn})", f"concentration_ratio_top_{topn}"))
+                elif metric_name == 'Entropy':
+                    for alpha in entropy_alphas:
+                        alpha_str = f"{alpha:g}"
+                        metric_columns.append((f"Entropy (alpha={alpha_str})", f"entropy_alpha_{alpha_str}"))
                 else:
                     metric_columns.append((metric_name, metric_name.lower()))
 
@@ -181,6 +197,7 @@ def main():
     network_metrics = get_metrics_network()
     geo_metrics = get_metrics_geo()
     concentration_ratio_topn = get_concentration_ratio_topn()
+    entropy_alphas = get_entropy_alphas()
     
     output_dir = pathlib.Path(__file__).parent / 'output'
     
@@ -194,6 +211,7 @@ def main():
         is_country=False,
         metric_names=network_metrics,
         concentration_ratio_topn=concentration_ratio_topn,
+        entropy_alphas=entropy_alphas,
     )
     
     # Process country files with geo metrics
@@ -203,6 +221,7 @@ def main():
         is_country=True,
         metric_names=geo_metrics,
         concentration_ratio_topn=concentration_ratio_topn,
+        entropy_alphas=entropy_alphas,
     )
 
 
diff --git a/bitcoin/config.yaml b/bitcoin/config.yaml
index 9490e31..ec7c83e 100644
--- a/bitcoin/config.yaml
+++ b/bitcoin/config.yaml
@@ -43,6 +43,8 @@ metrics_parameters:
   concentration_ratio_topn:
     - 1
     - 3
+  entropy_alpha:
+    - 1
 
 # Parameters for parsing/output generation
 parse_parameters:
diff --git a/bitcoin/network_decentralization/helper.py b/bitcoin/network_decentralization/helper.py
index 0b80686..8544a81 100644
--- a/bitcoin/network_decentralization/helper.py
+++ b/bitcoin/network_decentralization/helper.py
@@ -103,6 +103,29 @@ def get_concentration_ratio_topn():
     return values if values else [1, 3]
 
 
+def get_entropy_alphas():
+    """
+    Retrieves alpha values used by entropy metrics.
+    :returns: list of unique floats (defaults to [1])
+    """
+    params = get_config_data().get('metrics_parameters', {})
+    raw_alphas = params.get('entropy_alpha', [1])
+
+    if not isinstance(raw_alphas, list):
+        raw_alphas = [raw_alphas]
+
+    values = []
+    for value in raw_alphas:
+        try:
+            parsed = float(value)
+            if parsed not in values:
+                values.append(parsed)
+        except (TypeError, ValueError):
+            continue
+
+    return values if values else [1]
+
+
 def get_without_tor_ledgers():
     """
     Retrieves the target ledgers for generating *_without_tor CSV files.

From 0061c14faf724a1808d964e125c44dfc9095490d Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 15:17:49 +0000
Subject: [PATCH 40/42] Metrics now loaded as a dictionary from config.yaml

---
 bitcoin/compute_metrics.py                 | 145 +++++++++++----------
 bitcoin/config.yaml                        |  29 ++---
 bitcoin/network_decentralization/helper.py | 101 +++++++-------
 3 files changed, 145 insertions(+), 130 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 455b92e..108d31a 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -7,7 +7,9 @@
 import csv
 import pathlib
 import sys
-from network_decentralization.helper import get_metrics_network, get_metrics_geo, get_concentration_ratio_topn, get_entropy_alphas
+from ast import literal_eval
+
+from network_decentralization.helper import get_metrics_network, get_metrics_geo
 from network_decentralization.metrics.herfindahl_hirschman_index import compute_hhi
 from network_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient
 from network_decentralization.metrics.entropy import compute_entropy
@@ -60,59 +62,89 @@ def get_ledger_name(csv_path):
     return '_'.join(parts[1:])
 
 
-def compute_metrics(distribution, metric_names, concentration_ratio_topn, entropy_alphas):
+def normalize_metric_name(metric_name):
+    """Normalizes metric names from config into registry keys."""
+    if metric_name is None:
+        return ''
+    return str(metric_name).strip().lower().replace('-', '_').replace(' ', '_')
+
+
+def parse_metric_spec(metric_spec):
+    """Parses metric token strings like 'entropy=1' into (token, name, parameter)."""
+    token = str(metric_spec).strip()
+    if not token:
+        return None
+
+    if '=' not in token:
+        return token, normalize_metric_name(token), None
+
+    raw_name, raw_parameter = token.split('=', 1)
+    normalized_name = normalize_metric_name(raw_name)
+    parameter_text = raw_parameter.strip()
+    parameter_value = parse_metric_parameter(parameter_text)
+    return token, normalized_name, parameter_value
+
+
+def parse_metric_parameter(parameter_text):
+    """Parses metric parameter values from config strings into Python values."""
+    if parameter_text is None:
+        return None
+
+    text = str(parameter_text).strip()
+    if not text:
+        return None
+
+    try:
+        return literal_eval(text)
+    except (ValueError, SyntaxError):
+        return text
+
+
+def build_metric_columns(metric_specs):
+    """
+    Builds ordered metric specs from configured metric tokens.
+    :param metric_specs: list of metric tokens (e.g., ['hhi', 'entropy=1'])
+    :returns: list of tuples (metric_token, metric_name, parameter_value)
+    """
+    columns = []
+    for metric_spec in metric_specs:
+        parsed = parse_metric_spec(metric_spec)
+        if parsed is None:
+            continue
+
+        metric_token, metric_name, parameter_value = parsed
+        columns.append((metric_token, metric_name, parameter_value))
+
+    return columns
+
+
+def compute_metrics(distribution, metric_columns):
     """
     Compute specified metrics for a given distribution.
     
     :param distribution: Sorted list of entity counts (descending order)
-    :param metric_names: List of metric names to compute (e.g., ['HHI', 'Nakamoto', 'Entropy'])
-    :param concentration_ratio_topn: list of top-N parameters for concentration ratio metrics
-    :param entropy_alphas: alpha parameters used for entropy metric computation
+    :param metric_columns: list of tuples (metric_token, metric_name, parameter_value)
     :return: Dictionary with computed metric values
     """
     metrics = {}
 
-    # Mapping of metric display names to computation functions
-    # Concentration Ratio is handled separately because one metric name expands to multiple outputs (one per configured top-N value).
-    metric_map = {
-        'HHI': ('hhi', compute_hhi),
-        'Nakamoto': ('nakamoto', compute_nakamoto_coefficient),
-    }
-    
-    for metric_name in metric_names:
-        if metric_name == 'Concentration Ratio':
-            for topn in concentration_ratio_topn:
-                key = f"concentration_ratio_top_{topn}"
-                try:
-                    metrics[key] = compute_concentration_ratio(distribution, topn=topn)
-                except Exception as e:
-                    print(f"Error computing {metric_name} (topn={topn}): {e}", file=sys.stderr)
-                    metrics[key] = None
-            continue
+    for metric_token, metric_name, parameter_value in metric_columns:
+        function_name = f"compute_{metric_name}"
 
-        if metric_name == 'Entropy':
-            for alpha in entropy_alphas:
-                alpha_str = f"{alpha:g}"
-                key = f"entropy_alpha_{alpha_str}"
-                try:
-                    metrics[key] = compute_entropy(distribution, alpha=alpha)
-                except Exception as e:
-                    print(f"Error computing {metric_name} (alpha={alpha_str}): {e}", file=sys.stderr)
-                    metrics[key] = None
-            continue
-
-        if metric_name in metric_map:
-            key, func = metric_map[metric_name]
-            try:
-                metrics[key] = func(distribution)
-            except Exception as e:
-                print(f"Error computing {metric_name}: {e}", file=sys.stderr)
-                metrics[key] = None
+        try:
+            function = eval(function_name)
+            if parameter_value is None:
+                metrics[metric_token] = function(distribution)
+            else:
+                metrics[metric_token] = function(distribution, parameter_value)
+        except Exception as e:
+            print(f"Error computing {metric_token}: {e}", file=sys.stderr)
+            metrics[metric_token] = None
     
     return metrics
 
 
-def process_csv_files(output_dir, file_pattern, is_country, metric_names, concentration_ratio_topn, entropy_alphas):
+def process_csv_files(output_dir, file_pattern, is_country, metric_names):
     """
     Process all CSV files matching a pattern and output metrics.
     Appends results to existing files or creates new ones.
@@ -122,9 +154,9 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
     :param file_pattern: Glob pattern for CSV files (e.g., 'organizations_*.csv')
     :param is_country: Boolean to indicate if processing country files
     :param metric_names: List of metric names to compute and output
-    :param concentration_ratio_topn: list of top-N parameters for concentration ratio metrics
-    :param entropy_alphas: alpha parameters used for entropy metric computation
     """
+    metric_columns = build_metric_columns(metric_names)
+
     # For bitcoin, prefer the _without_tor variant if it exists; skip the regular bitcoin file in that case
     file_type = 'countries' if is_country else 'organizations'
     without_tor_path = output_dir / f"{file_type}_bitcoin_without_tor.csv"
@@ -139,7 +171,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
         try:
             ledger = get_ledger_name(csv_path)
             date, distribution = read_csv_data(csv_path)
-            metrics = compute_metrics(distribution, metric_names, concentration_ratio_topn, entropy_alphas)
+            metrics = compute_metrics(distribution, metric_columns)
             
             # Determine output filename and metric column mapping
             file_type = 'countries' if is_country else 'organizations'
@@ -147,20 +179,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
             output_path = output_dir / output_filename
             file_exists = output_path.exists()
             
-            # Build output metric columns from selected metrics.
-            metric_columns = []
-            for metric_name in metric_names:
-                if metric_name == 'Concentration Ratio':
-                    for topn in concentration_ratio_topn:
-                        metric_columns.append((f"Concentration Ratio (Top {topn})", f"concentration_ratio_top_{topn}"))
-                elif metric_name == 'Entropy':
-                    for alpha in entropy_alphas:
-                        alpha_str = f"{alpha:g}"
-                        metric_columns.append((f"Entropy (alpha={alpha_str})", f"entropy_alpha_{alpha_str}"))
-                else:
-                    metric_columns.append((metric_name, metric_name.lower()))
-
-            header = ['ledger', 'date', 'clustering'] + [column[0] for column in metric_columns]
+            header = ['ledger', 'date', 'clustering'] + [metric_token for metric_token, _, _ in metric_columns]
             
             # Write header and data (append if exists)
             with open(output_path, 'a', newline='', encoding='utf-8') as f:
@@ -170,8 +189,8 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names, concen
                 
                 # Build row with metric values in the same order as header
                 row = [ledger, date, 'False']
-                for _, metric_key in metric_columns:
-                    value = metrics.get(metric_key)
+                for metric_token, _, _ in metric_columns:
+                    value = metrics.get(metric_token)
                     if value is None:
                         row.append('')
                     elif isinstance(value, float):
@@ -196,8 +215,6 @@ def main():
     # Load metric names from config using helper functions
     network_metrics = get_metrics_network()
     geo_metrics = get_metrics_geo()
-    concentration_ratio_topn = get_concentration_ratio_topn()
-    entropy_alphas = get_entropy_alphas()
     
     output_dir = pathlib.Path(__file__).parent / 'output'
     
@@ -210,8 +227,6 @@ def main():
         'organizations_*.csv',
         is_country=False,
         metric_names=network_metrics,
-        concentration_ratio_topn=concentration_ratio_topn,
-        entropy_alphas=entropy_alphas,
     )
     
     # Process country files with geo metrics
@@ -220,8 +235,6 @@ def main():
         'countries_*.csv',
         is_country=True,
         metric_names=geo_metrics,
-        concentration_ratio_topn=concentration_ratio_topn,
-        entropy_alphas=entropy_alphas,
     )
 
 
diff --git a/bitcoin/config.yaml b/bitcoin/config.yaml
index ec7c83e..4082e88 100644
--- a/bitcoin/config.yaml
+++ b/bitcoin/config.yaml
@@ -10,9 +10,6 @@ mode:
   - Countries
   - Organizations
 
-# Used by distribution.py to know which column to distribute
-date: '2025-03-28'
-
 execution_parameters:
   concurrency: 100
 
@@ -26,27 +23,23 @@ output_directories:
 
 # Metrics for network analysis (organizations)
 network_metrics:
-  - HHI
-  - Nakamoto
-#  - Entropy
-  - Concentration Ratio
+  hhi:
+  nakamoto_coefficient:
+  concentration_ratio:
+    - 1
+    - 3
 
 # Metrics for geographic analysis (countries)
 geo_metrics:
-  - HHI
-  - Nakamoto
-  - Entropy
-  - Concentration Ratio
-
-# Parameters for metric computation
-metrics_parameters:
-  concentration_ratio_topn:
+  hhi:
+  nakamoto_coefficient:
+  entropy:
     - 1
-    - 3
-  entropy_alpha:
+  concentration_ratio:
     - 1
+    - 3
 
 # Parameters for parsing/output generation
 parse_parameters:
   without_tor_ledgers:
-    - bitcoin
+    - bitcoin
\ No newline at end of file
diff --git a/bitcoin/network_decentralization/helper.py b/bitcoin/network_decentralization/helper.py
index 8544a81..dd7d9c3 100644
--- a/bitcoin/network_decentralization/helper.py
+++ b/bitcoin/network_decentralization/helper.py
@@ -42,13 +42,6 @@ def get_mode():
     """
     return get_config_data()['mode']
 
-def get_date():
-    """
-    Retrieves data regarding the date to use
-    :returns: the date to be used by distribution.py
-    """
-    return get_config_data()['date']
-
 def get_active():
     """
     Retrieves data regarding the packets to clean up 
@@ -66,64 +59,80 @@ def get_concurrency():
 
 def get_metrics_network():
     """
-    Retrieves the list of metrics to compute for network analysis (organizations)
-    :returns: a list of metric names to compute
+    Retrieves the list of metrics to compute for network analysis (organizations).
+    Supports either a list (e.g., ['hhi', 'nakamoto']) or a dictionary
+    (e.g., {'concentration_ratio': [1, 3]}), which is expanded to tokens like
+    'concentration_ratio=1' and 'concentration_ratio=3'.
+    :returns: a list of metric tokens to compute
     """
-    return get_config_data().get('network_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Concentration Ratio'])
+    default = {
+        'hhi': None,
+        'nakamoto': None,
+        'entropy': None,
+        'concentration_ratio': None,
+    }
+    return _expand_metric_config(get_config_data().get('network_metrics', default), default)
 
 
 def get_metrics_geo():
     """
-    Retrieves the list of metrics to compute for geographic analysis (countries)
-    :returns: a list of metric names to compute
+    Retrieves the list of metrics to compute for geographic analysis (countries).
+    Supports either a list (e.g., ['hhi', 'nakamoto']) or a dictionary
+    (e.g., {'concentration_ratio': [1, 3]}), which is expanded to tokens like
+    'concentration_ratio=1' and 'concentration_ratio=3'.
+    :returns: a list of metric tokens to compute
     """
-    return get_config_data().get('geo_metrics', ['HHI', 'Nakamoto', 'Entropy', 'Concentration Ratio'])
+    default = {
+        'hhi': None,
+        'nakamoto': None,
+        'entropy': None,
+        'concentration_ratio': None,
+    }
+    return _expand_metric_config(get_config_data().get('geo_metrics', default), default)
 
 
-def get_concentration_ratio_topn():
+def _expand_metric_config(raw_metrics, default_metrics):
     """
-    Retrieves top-N values used by concentration-ratio based metrics.
-    :returns: list of unique positive integers (defaults to [1, 3])
+    Expands metric configuration into a flat list of metric tokens.
+    Example: {'entropy': [1, 2]} -> ['entropy=1', 'entropy=2']
     """
-    params = get_config_data().get('metrics_parameters', {})
-    raw_topn = params.get('concentration_ratio_topn', [1, 3])
+    metrics = raw_metrics if raw_metrics is not None else default_metrics
 
-    if not isinstance(raw_topn, list):
-        raw_topn = [raw_topn]
+    if isinstance(metrics, list):
+        return [str(metric).strip() for metric in metrics if str(metric).strip()]
 
-    values = []
-    for value in raw_topn:
-        try:
-            parsed = int(value)
-            if parsed > 0 and parsed not in values:
-                values.append(parsed)
-        except (TypeError, ValueError):
-            continue
+    if not isinstance(metrics, dict):
+        return []
 
-    return values if values else [1, 3]
+    expanded = []
+    for metric_name, parameter_values in metrics.items():
+        name = str(metric_name).strip()
+        if not name:
+            continue
 
+        if parameter_values is None:
+            expanded.append(name)
+            continue
 
-def get_entropy_alphas():
-    """
-    Retrieves alpha values used by entropy metrics.
-    :returns: list of unique floats (defaults to [1])
-    """
-    params = get_config_data().get('metrics_parameters', {})
-    raw_alphas = params.get('entropy_alpha', [1])
+        if isinstance(parameter_values, list):
+            values = parameter_values
+        else:
+            values = [parameter_values]
 
-    if not isinstance(raw_alphas, list):
-        raw_alphas = [raw_alphas]
+        unique_values = []
+        for value in values:
+            rendered = None if value is None else str(value).strip()
+            if rendered is not None and rendered not in unique_values:
+                unique_values.append(rendered)
 
-    values = []
-    for value in raw_alphas:
-        try:
-            parsed = float(value)
-            if parsed not in values:
-                values.append(parsed)
-        except (TypeError, ValueError):
+        if not unique_values:
+            expanded.append(name)
             continue
 
-    return values if values else [1]
+        for rendered in unique_values:
+            expanded.append(f"{name}={rendered}")
+
+    return expanded
 
 
 def get_without_tor_ledgers():

From f9e77f5bbbc9861469383cec6425323ceed352c8 Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 15:26:23 +0000
Subject: [PATCH 41/42] Update requirements.txt

---
 bitcoin/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bitcoin/requirements.txt b/bitcoin/requirements.txt
index 44084dd..20224ba 100644
--- a/bitcoin/requirements.txt
+++ b/bitcoin/requirements.txt
@@ -5,6 +5,6 @@ dnspython>=2.6.1
 PySocks>=1.7.1
 python3-nmap>=1.6.0
 pandas>=2.2.3
+numpy>=1.26
 networkx>=3.1
-scipy>=1.13
 matplotlib>=3.9

From 0d6d67d4dd5b0d96c1c0795270f2cccd1ecee2cd Mon Sep 17 00:00:00 2001
From: LauraAntunes1 <190111637+LauraAntunes1@users.noreply.github.com>
Date: Wed, 18 Mar 2026 16:31:24 +0000
Subject: [PATCH 42/42] No default values for metrics and without_tor not only
 for bitcoin

---
 bitcoin/compute_metrics.py                 | 21 ++++++++++++--------
 bitcoin/network_decentralization/helper.py | 23 +++++++---------------
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/bitcoin/compute_metrics.py b/bitcoin/compute_metrics.py
index 108d31a..8c76f74 100644
--- a/bitcoin/compute_metrics.py
+++ b/bitcoin/compute_metrics.py
@@ -9,7 +9,7 @@
 import sys
 from ast import literal_eval
 
-from network_decentralization.helper import get_metrics_network, get_metrics_geo
+from network_decentralization.helper import get_metrics_network, get_metrics_geo, get_without_tor_ledgers
 from network_decentralization.metrics.herfindahl_hirschman_index import compute_hhi
 from network_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient
 from network_decentralization.metrics.entropy import compute_entropy
@@ -148,7 +148,7 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names):
     """
     Process all CSV files matching a pattern and output metrics.
     Appends results to existing files or creates new ones.
-    For bitcoin, uses the _without_tor versions if they exist.
+    Uses _without_tor versions when configured in parse_parameters.without_tor_ledgers.
     
     :param output_dir: Path to the output directory
     :param file_pattern: Glob pattern for CSV files (e.g., 'organizations_*.csv')
@@ -156,20 +156,25 @@ def process_csv_files(output_dir, file_pattern, is_country, metric_names):
     :param metric_names: List of metric names to compute and output
     """
     metric_columns = build_metric_columns(metric_names)
+    without_tor_ledgers = set(get_without_tor_ledgers() or [])
 
-    # For bitcoin, prefer the _without_tor variant if it exists; skip the regular bitcoin file in that case
+    # Prefer configured _without_tor variants and skip the corresponding regular file when both exist.
     file_type = 'countries' if is_country else 'organizations'
-    without_tor_path = output_dir / f"{file_type}_bitcoin_without_tor.csv"
-    skip_regular_bitcoin = without_tor_path.exists()
 
     csv_files = sorted(output_dir.glob(file_pattern))
     
     for csv_path in csv_files:
-        if csv_path.name == f"{file_type}_bitcoin.csv" and skip_regular_bitcoin:
-            continue
-            
         try:
             ledger = get_ledger_name(csv_path)
+
+            regular_path = output_dir / f"{file_type}_{ledger}.csv"
+            without_tor_path = output_dir / f"{file_type}_{ledger}_without_tor.csv"
+            is_regular_file = csv_path.name == regular_path.name
+            has_without_tor_variant = without_tor_path.exists()
+
+            if is_regular_file and ledger in without_tor_ledgers and has_without_tor_variant:
+                continue
+
             date, distribution = read_csv_data(csv_path)
             metrics = compute_metrics(distribution, metric_columns)
             
diff --git a/bitcoin/network_decentralization/helper.py b/bitcoin/network_decentralization/helper.py
index dd7d9c3..e2dc9e8 100644
--- a/bitcoin/network_decentralization/helper.py
+++ b/bitcoin/network_decentralization/helper.py
@@ -65,13 +65,7 @@ def get_metrics_network():
     'concentration_ratio=1' and 'concentration_ratio=3'.
     :returns: a list of metric tokens to compute
     """
-    default = {
-        'hhi': None,
-        'nakamoto': None,
-        'entropy': None,
-        'concentration_ratio': None,
-    }
-    return _expand_metric_config(get_config_data().get('network_metrics', default), default)
+    return _expand_metric_config(get_config_data().get('network_metrics'))
 
 
 def get_metrics_geo():
@@ -82,21 +76,18 @@ def get_metrics_geo():
     'concentration_ratio=1' and 'concentration_ratio=3'.
     :returns: a list of metric tokens to compute
     """
-    default = {
-        'hhi': None,
-        'nakamoto': None,
-        'entropy': None,
-        'concentration_ratio': None,
-    }
-    return _expand_metric_config(get_config_data().get('geo_metrics', default), default)
+    return _expand_metric_config(get_config_data().get('geo_metrics'))
 
 
-def _expand_metric_config(raw_metrics, default_metrics):
+def _expand_metric_config(raw_metrics):
     """
     Expands metric configuration into a flat list of metric tokens.
     Example: {'entropy': [1, 2]} -> ['entropy=1', 'entropy=2']
     """
-    metrics = raw_metrics if raw_metrics is not None else default_metrics
+    metrics = raw_metrics
+
+    if metrics is None:
+        return []
 
     if isinstance(metrics, list):
         return [str(metric).strip() for metric in metrics if str(metric).strip()]