-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathHome.py
More file actions
91 lines (85 loc) · 3.66 KB
/
Home.py
File metadata and controls
91 lines (85 loc) · 3.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import streamlit as st
from pathlib import Path
# Session state variables for the Topic Visualization page
if ('p_json_file' not in st.session_state
or 'p_model' not in st.session_state
or'p_top_n_words' not in st.session_state
or 'p_n_gram_range' not in st.session_state
or 'p_min_topic_size' not in st.session_state
or 'p_n_neighbors' not in st.session_state
or 'p_min_dist' not in st.session_state
or 'p_n_components' not in st.session_state
or 'p_min_cluster_size' not in st.session_state
or 'p_min_samples' not in st.session_state
or 'p_diversity' not in st.session_state
or 'p_visualization' not in st.session_state
or 'p_vis_plot' not in st.session_state
or 'p_fig_plotly' not in st.session_state
or 'p_param_changed' not in st.session_state):
st.session_state.p_json_file = None
st.session_state.p_model = None
st.session_state.p_top_n_words = 10
st.session_state.p_n_gram_range = 3
st.session_state.p_min_topic_size = 10
st.session_state.p_n_neighbors = 15
st.session_state.p_min_dist = 0.01
st.session_state.p_n_components = 5
st.session_state.p_min_cluster_size = 10
st.session_state.p_min_samples = 10
st.session_state.p_diversity = 0.75
st.session_state.p_visualization = None
st.session_state.p_vis_plot = None
st.session_state.fig_plotly = None
st.session_state.p_param_changed = True
# Session state variables for the File Selection page
if ('p_first_page' not in st.session_state
or 'p_last_page' not in st.session_state
or 'p_current_first' not in st.session_state
or 'p_current_last' not in st.session_state
or 'p_file_loaded' not in st.session_state
or 'p_range_set' not in st.session_state
or 'p_selected_pdf' not in st.session_state
or 'p_pdf' not in st.session_state):
st.session_state.p_first_page = 1
st.session_state.p_last_page = 1
st.session_state.p_current_first = 1
st.session_state.p_current_last = 1
st.session_state.p_file_loaded = False
st.session_state.p_range_set = False
st.session_state.p_selected_pdf = None
st.session_state.p_pdf = None
# Session state variables for the Text Exploration page
if ('p_dataexp_json_file' not in st.session_state
or 'p_dataexp_docstruc_bins' not in st.session_state
or 'p_explore' not in st.session_state
or 'p_common_words_bins' not in st.session_state
or 'p_ngram' not in st.session_state
or 'p_ngram_cnt' not in st.session_state
or 'p_entity' not in st.session_state
or 'p_entity_cnt' not in st.session_state):
st.session_state.p_dataexp_json_file = None
st.session_state.p_dataexp_docstruc_bins = 50
st.session_state.p_explore = None
st.session_state.p_common_words_bins = 30
st.session_state.p_ngram = 2
st.session_state.p_ngram_cnt = 40
st.session_state.p_entity = "GPE"
st.session_state.p_entity_cnt = 20
def read_markdown_file(markdown_file):
return Path(markdown_file).read_text()
st.set_page_config(
page_title="Home",
layout="wide",
initial_sidebar_state="expanded",
menu_items={
'About': '''
### BERTopic Topic Modelling for Indexers
##### Created by [Don Howes](https://dhindexing.ca)
##### Github repo:
Application released under the [CC BY 4.0](https://creativecommons.org/licenses/by/4.0/) license
''',
"Get Help": "https://github.com/DWHowes/BERTopic-Topic-Modeling/blob/main/readme.md"
}
)
intro_markdown = read_markdown_file("home.md")
st.markdown(intro_markdown, unsafe_allow_html=True)