Skip to content
Merged
27 changes: 24 additions & 3 deletions scripts/2-process/gcs_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,24 @@

# Constants
QUARTER = os.path.basename(PATHS["data_quarter"])
FILE_PATHS = [
shared.path_join(PATHS["data_phase"], "gcs_product_totals.csv"),
shared.path_join(PATHS["data_phase"], "gcs_status_combined_totals.csv"),
shared.path_join(PATHS["data_phase"], "gcs_status_lastest_totals.csv"),
shared.path_join(PATHS["data_phase"], "gcs_status_prior_totals.csv"),
shared.path_join(PATHS["data_phase"], "gcs_status_retired_totals.csv"),
shared.path_join(PATHS["data_phase"], "gcs_totals_by_country.csv"),
shared.path_join(PATHS["data_phase"], "gcs_totals_by_free_cultural.csv"),
shared.path_join(PATHS["data_phase"], "gcs_totals_by_language.csv"),
shared.path_join(PATHS["data_phase"], "gcs_totals_by_restrictions.csv"),
]


def parse_arguments():
"""
Parse command-line options, returns parsed argument namespace.
"""
global QUARTER
LOGGER.info("Parsing command-line options")
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
Expand All @@ -48,15 +60,23 @@ def parse_arguments():
parser.add_argument(
"--enable-git",
action="store_true",
help="Enable git actions such as fetch, merge, add, commit, and push"
" (default: False)",
help="Enable git actions such as fetch, merge, add, commit, and push",
)
parser.add_argument(
"--force",
action="store_true",
help="Regenerate data even if processed files already exist",
)
args = parser.parse_args()
if not args.enable_save and args.enable_git:
parser.error("--enable-git requires --enable-save")
if args.quarter != QUARTER:
global PATHS
global FILE_PATHS, PATHS
FILE_PATHS = shared.paths_list_update(
LOGGER, FILE_PATHS, QUARTER, args.quarter
)
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
QUARTER = args.quarter
args.logger = LOGGER
args.paths = PATHS
return args
Expand Down Expand Up @@ -308,6 +328,7 @@ def main():
args = parse_arguments()
shared.paths_log(LOGGER, PATHS)
shared.git_fetch_and_merge(args, PATHS["repo"])
shared.check_for_data_files(args, FILE_PATHS, QUARTER)

# Count data
file1_count = shared.path_join(PATHS["data_1-fetch"], "gcs_1_count.csv")
Expand Down
30 changes: 18 additions & 12 deletions scripts/2-process/github_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,17 @@

# Constants
QUARTER = os.path.basename(PATHS["data_quarter"])
FILE_PATHS = [
shared.path_join(PATHS["data_phase"], "github_totals_by_license.csv"),
shared.path_join(PATHS["data_phase"], "github_totals_by_restriction.csv"),
]


def parse_arguments():
"""
Parse command-line options, returns parsed argument namespace.
"""
global QUARTER
LOGGER.info("Parsing command-line options")
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
Expand All @@ -48,24 +53,27 @@ def parse_arguments():
help="Enable git actions such as fetch, merge, add, commit, and push"
" (default: False)",
)
parser.add_argument(
"--force",
action="store_true",
help="Regenerate data even if processed files already exist",
)

args = parser.parse_args()
if not args.enable_save and args.enable_git:
parser.error("--enable-git requires --enable-save")
if args.quarter != QUARTER:
global PATHS
global FILE_PATHS, PATHS
FILE_PATHS = shared.paths_list_update(
LOGGER, FILE_PATHS, QUARTER, args.quarter
)
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
QUARTER = args.quarter
args.logger = LOGGER
args.paths = PATHS
return args


def check_for_data_file(file_path):
if os.path.exists(file_path):
raise shared.QuantifyingException(
f"Processed data already exists for {QUARTER}", 0
)


def data_to_csv(args, data, file_path):
if not args.enable_save:
return
Expand Down Expand Up @@ -98,7 +106,6 @@ def process_totals_by_license(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], "github_totals_by_license.csv"
)
check_for_data_file(file_path)
data_to_csv(args, data, file_path)


Expand Down Expand Up @@ -133,15 +140,14 @@ def process_totals_by_restriction(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], "github_totals_by_restriction.csv"
)
check_for_data_file(file_path)
data_to_csv(args, data, file_path)


def main():
args = parse_arguments()
shared.paths_log(LOGGER, PATHS)
shared.git_fetch_and_merge(args, PATHS["repo"])

shared.check_for_data_files(args, FILE_PATHS, QUARTER)
file_count = shared.path_join(PATHS["data_1-fetch"], "github_1_count.csv")
count_data = shared.open_data_file(
LOGGER, file_count, usecols=["TOOL_IDENTIFIER", "COUNT"]
Expand All @@ -167,7 +173,7 @@ def main():
LOGGER.info(e.message)
else:
LOGGER.error(e.message)
sys.exit(e.code)
sys.exit(e.exit_code)
except SystemExit as e:
LOGGER.error(f"System exit with code: {e.code}")
sys.exit(e.code)
Expand Down
35 changes: 24 additions & 11 deletions scripts/2-process/wikipedia_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,24 @@

# Constants
QUARTER = os.path.basename(PATHS["data_quarter"])
FILE_PATHS = [
shared.path_join(
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
),
shared.path_join(
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
),
shared.path_join(
PATHS["data_phase"], "wikipedia_language_representation.csv"
),
]


def parse_arguments():
"""
Parse command-line options, returns parsed argument namespace.
"""
global QUARTER
LOGGER.info("Parsing command-line options")
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
Expand All @@ -52,24 +64,27 @@ def parse_arguments():
help="Enable git actions such as fetch, merge, add, commit, and push"
" (default: False)",
)
parser.add_argument(
"--force",
action="store_true",
help="Regenerate data even if processed files already exist",
)

args = parser.parse_args()
if not args.enable_save and args.enable_git:
parser.error("--enable-git requires --enable-save")
if args.quarter != QUARTER:
global PATHS
global FILE_PATHS, PATHS
FILE_PATHS = shared.paths_list_update(
LOGGER, FILE_PATHS, QUARTER, args.quarter
)
PATHS = shared.paths_update(LOGGER, PATHS, QUARTER, args.quarter)
QUARTER = args.quarter
args.logger = LOGGER
args.paths = PATHS
return args


def check_for_data_file(file_path):
if os.path.exists(file_path):
raise shared.QuantifyingException(
f"Processed data already exists for {QUARTER}", 0
)


def data_to_csv(args, data, file_path):
if not args.enable_save:
return
Expand Down Expand Up @@ -98,7 +113,6 @@ def process_highest_language_usage(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], "wikipedia_highest_language_usage.csv"
)
check_for_data_file(file_path)
data_to_csv(args, top_10, file_path)


Expand All @@ -122,7 +136,6 @@ def process_least_language_usage(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], "wikipedia_least_language_usage.csv"
)
check_for_data_file(file_path)
data_to_csv(args, bottom_10, file_path)


Expand All @@ -149,14 +162,14 @@ def process_language_representation(args, count_data):
file_path = shared.path_join(
PATHS["data_phase"], "wikipedia_language_representation.csv"
)
check_for_data_file(file_path)
data_to_csv(args, language_counts, file_path)


def main():
args = parse_arguments()
shared.paths_log(LOGGER, PATHS)
shared.git_fetch_and_merge(args, PATHS["repo"])
shared.check_for_data_files(args, FILE_PATHS, QUARTER)
file_count = shared.path_join(
PATHS["data_1-fetch"], "wikipedia_count_by_languages.csv"
)
Expand Down
31 changes: 21 additions & 10 deletions scripts/3-report/gcs_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import sys
import textwrap
import traceback
from pathlib import Path

# Third-party
from pygments import highlight
Expand All @@ -27,7 +28,8 @@

# Constants
QUARTER = os.path.basename(PATHS["data_quarter"])
SECTION = "Google Custom Search (GCS)"
SECTION_FILE = Path(__file__).name
SECTION_TITLE = "Google Custom Search (GCS)"


def parse_arguments():
Expand Down Expand Up @@ -83,7 +85,8 @@ def gcs_intro(args):
total_count = f"{data['Count'].sum():,d}"
shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
"Overview",
None,
None,
Expand Down Expand Up @@ -137,7 +140,8 @@ def plot_products(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing Creative Commons (CC) legal tool product totals and"
Expand Down Expand Up @@ -180,7 +184,8 @@ def plot_tool_status(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing Creative Commons (CC) legal tool status totals and"
Expand Down Expand Up @@ -223,7 +228,8 @@ def plot_latest_tools(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing latest Creative Commons (CC) legal tool totals and"
Expand Down Expand Up @@ -265,7 +271,8 @@ def plot_prior_tools(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing prior Creative Commons (CC) legal tool totals and"
Expand Down Expand Up @@ -311,7 +318,8 @@ def plot_retired_tools(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing retired Creative Commons (CC) legal tools total and"
Expand Down Expand Up @@ -360,7 +368,8 @@ def plot_countries_highest_usage(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing countries with the highest useage of the latest"
Expand Down Expand Up @@ -413,7 +422,8 @@ def plot_languages_highest_usage(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing languages with the highest useage of the latest"
Expand Down Expand Up @@ -460,7 +470,8 @@ def plot_free_culture(args):

shared.update_readme(
args,
SECTION,
SECTION_FILE,
SECTION_TITLE,
title,
image_path,
"Plots showing Approved for Free Cultural Works legal tool usage.",
Expand Down
Loading