mirror of
https://github.com/alicia-ziying-yang/conTEXT-explorer.git
synced 2022-02-20 22:06:42 +03:00
improve performance
This commit is contained in:
267
app.py
267
app.py
@@ -7,6 +7,7 @@ import dash_uploader as du
|
||||
import uuid
|
||||
import dash_bootstrap_components as dbc
|
||||
from dash.dependencies import Input, Output, State,MATCH, ALL
|
||||
from dash.exceptions import PreventUpdate
|
||||
import dash_table
|
||||
import plotly.graph_objs as go
|
||||
import dash_daq as daq
|
||||
@@ -95,6 +96,7 @@ def build_banner(corpus_element,saves_element,loaded):
|
||||
html.Label("From"),
|
||||
daq.NumericInput(
|
||||
id="year-from",
|
||||
style={"color":"#e85e56"},
|
||||
className="year-selector",
|
||||
min=min(year_list),
|
||||
max=max(year_list),
|
||||
@@ -102,6 +104,7 @@ def build_banner(corpus_element,saves_element,loaded):
|
||||
),html.Label("To"),
|
||||
daq.NumericInput(
|
||||
id="year-to",
|
||||
style={"color":"#e85e56"},
|
||||
className="year-selector",
|
||||
min=min(year_list),
|
||||
max=max(year_list),
|
||||
@@ -534,16 +537,43 @@ def build_tabs():
|
||||
],
|
||||
)
|
||||
|
||||
def build_tab_1():
|
||||
# --- build "sentences" tab ---
|
||||
# search, and show ranking
|
||||
|
||||
def build_tab_1(corpus_name):
|
||||
corpus_ind_dir=corpus[corpus_name]
|
||||
display_fileds = preprocess_corpus.get_table_fieldnames(corpus_ind_dir)
|
||||
display_fileds.append("score")
|
||||
opt=[{'label': f.upper(), 'value': f} for f in display_fileds]
|
||||
|
||||
return [
|
||||
dcc.Store(id='memory-output'),
|
||||
dcc.Loading(color="rgba(240, 218, 209,0.8)",type="cube",style={"padding-top":"25%"},children=[
|
||||
html.Div(style={"margin-left": "30px","margin-top": "1.5%", "font-variant": "all-small-caps","color": "#e85e56"},
|
||||
children="QUERY"),
|
||||
html.Div(id="current_query",style={"margin-left": "50px", "color": "#f4e9dc","width": "42%"},
|
||||
children=""),
|
||||
html.Div(style={"display":"flex"},children=[
|
||||
html.Div(style={"width": "430px"},
|
||||
children=[html.Div(style={"margin-left": "30px", "font-variant": "all-small-caps","color": "#e85e56"},
|
||||
children="DISPLAYED COLUMNS"),
|
||||
dcc.Dropdown(id="r_table_paras",className="table-dropdown",style={"margin-left":"10%"},
|
||||
options=opt,value=display_fileds,multi=True)]
|
||||
),
|
||||
html.Div(children=[html.Div(style={"font-variant": "all-small-caps","color": "#e85e56"},
|
||||
children="SENTS PER PAGE"),
|
||||
daq.NumericInput(id="n_pp",style={"margin-top": "4px","margin-left": "10%","color":"#f4e9dc","width": "40%"},
|
||||
className="year-selector sent-pp",min=10,max=1000,value=16)]
|
||||
)
|
||||
]),
|
||||
|
||||
|
||||
html.Div(
|
||||
id="rel_sent",style={"margin-left": "20px", "margin-top": "2%","font-variant": "all-small-caps"},className="side-by-side",
|
||||
id="rel_sent",style={"position": "absolute","right": "1.5%","top": "90px","font-variant": "all-small-caps"},className="side-by-side",
|
||||
children=[""]
|
||||
),
|
||||
html.Div(
|
||||
id="ranking-table",
|
||||
id="ranking-table",style={"margin-top":"1%"},
|
||||
className="output-datatable"
|
||||
),
|
||||
]),
|
||||
@@ -554,106 +584,151 @@ def build_tab_1():
|
||||
)
|
||||
]
|
||||
|
||||
# --- build "sentences" tab ---
|
||||
# search, and show ranking
|
||||
|
||||
@app.callback([Output("ranking-table", "children"),Output('memory-output', 'data'),
|
||||
Output("rel_sent","children")],
|
||||
Output("rel_sent","children"),Output("current_query","children")],
|
||||
[Input("base-term", 'value'),Input("corpus-select-dropdown","value"),
|
||||
Input("added-terms-global", "children"),Input("year-from","value"),Input("year-to","value")],
|
||||
[State("base-term", 'value')]
|
||||
Input("added-terms-global", "children"),Input("year-from","value"),Input("year-to","value"),
|
||||
Input("r_table_paras",'value'),Input("n_pp","value")],
|
||||
[State("base-term", 'value'),
|
||||
State('memory-output', 'data'),State("rel_sent","children"),State("current_query","children")]
|
||||
)
|
||||
def show_ranking(base_term,corpus_name,added,y_from,y_to,t):
|
||||
def show_ranking(base_term,corpus_name,added,y_from,y_to,cols,npp,t, mo_result,rs,cq):
|
||||
|
||||
if corpus_name:
|
||||
corpus_ind = corpus[corpus_name]
|
||||
if base_term:
|
||||
added.append(t) #add base term
|
||||
if y_from > y_to:
|
||||
return [html.Div("Please correct the year range."),"",""]
|
||||
return [html.Div("Please correct the year range."),"","",""]
|
||||
for term in added: #handle phrases
|
||||
if "_" in term:
|
||||
added.append(term.replace("_"," "))
|
||||
added.remove(term)
|
||||
|
||||
result, rel_sent_no, sent_no, rel_article_no, article_no=preprocess_corpus.search_corpus(corpus_ind, added, y_from, y_to)
|
||||
result_df = pd.DataFrame.from_records(result)
|
||||
del result_df["Document"]
|
||||
rel_sent_div=[ html.Div(className="number-card-1",children=[
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis",children=[rel_sent_no]),
|
||||
html.Div(className="number-label",children=[html.Div("RELEVANT SENTENCES")]),
|
||||
]),html.Div("|",className="saperator"),
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis2",children=[sent_no]),
|
||||
html.Div(className="number-label2",children=[html.Div("TOTAL SENTENCES")]),
|
||||
])
|
||||
]),
|
||||
html.Div(className="number-card-2",children=[
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis",children=[rel_article_no]),
|
||||
html.Div(className="number-label",children=[html.Div("RELEVANT DOCUMENTS")]),
|
||||
]),html.Div("|",className="saperator"),
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis2",children=[article_no]),
|
||||
html.Div(className="number-label2",children=[html.Div("TOTAL DOCUMENTS")]),
|
||||
])
|
||||
])
|
||||
]
|
||||
if len(result)>0:
|
||||
return [dash_table.DataTable(
|
||||
id="ranking_table",
|
||||
sort_action='native',
|
||||
sort_mode='multi',
|
||||
filter_action="native",
|
||||
style_header={"fontWeight": "bold", "color": "inherit","border-bottom":"1px dashed"},
|
||||
style_as_list_view=True,
|
||||
fill_width=True,
|
||||
page_size=10,
|
||||
style_cell_conditional=[
|
||||
{"if": {"column_id": "Sentence"}, 'width': '500px',"maxWidth":'500px'},
|
||||
{"if": {"column_id": "Title"}, "padding-left":"15px","maxWidth":'300px'},
|
||||
{'if': {'column_id': 'Author'},'maxWidth': '150px'},
|
||||
{'if': {'row_index': 'odd'},"backgroundColor":"#49494966"}
|
||||
],
|
||||
style_cell={
|
||||
"backgroundColor": "transparent",
|
||||
"fontFamily": "Open Sans",
|
||||
"padding": "0 0.2rem",
|
||||
"color": "#f4e9dc",
|
||||
"border": "none",
|
||||
'overflow': 'hidden',
|
||||
'textOverflow': 'ellipsis',
|
||||
'width': '55px',
|
||||
'minWidth': '55px',
|
||||
'maxWidth': '200px',
|
||||
"padding-left":"10px",
|
||||
"textAlign": "left"
|
||||
},
|
||||
css=[
|
||||
{"selector": "tr:hover td", "rule": "color: #e85e56 !important;cursor:pointer;height:10px;"},
|
||||
{"selector": "td:hover", "rule": "border-bottom: dashed 0px !important;"},
|
||||
{"selector": ".dash-spreadsheet-container table",
|
||||
"rule": '--text-color: #e85e56 !important'},
|
||||
{"selector":".previous-next-container","rule":"float: left;"},
|
||||
{"selector": "tr", "rule": "background-color: transparent;"},
|
||||
{"selector": ".current-page", "rule": "background-color: transparent;"},
|
||||
{"selector":".current-page::placeholder","rule":"color:#e85e56;"},
|
||||
{"selector": ".column-header--sort","rule":"color: #e85e56; padding-right:3px;"}
|
||||
],
|
||||
style_data_conditional=[
|
||||
{"if": {"state": "active"}, # 'active' | 'selected'
|
||||
"border": "0px solid"}]+
|
||||
data_bars(result, 'Score'),
|
||||
data=result,
|
||||
columns=[{"id": c, "name": c} for c in result_df.columns],
|
||||
selected_rows=[],
|
||||
),result,rel_sent_div]
|
||||
ctx = dash.callback_context
|
||||
if not ctx.triggered:
|
||||
button_id = '' # default
|
||||
else:
|
||||
return [html.Div("No result"),"",""]
|
||||
button_id = ctx.triggered[0]['prop_id'].split('.')[0]
|
||||
|
||||
|
||||
if ((button_id == "r_table_paras")|(button_id == "n_pp")): # no need to search index again, just change layout
|
||||
result = mo_result[0]
|
||||
tooltip = mo_result[1]
|
||||
if len(result)>0:
|
||||
result_df = pd.DataFrame.from_records(result)
|
||||
columns_d = [{"id": "id", "name": "id"}]
|
||||
columns_d.append({"id": "sentence", "name": "sentence"})
|
||||
for c in result_df.columns:
|
||||
if ((c != "id")&(c != "sentence")&(c in cols)):
|
||||
columns_d.append({"id": c, "name": c})
|
||||
|
||||
return [build_ranking_table(result,columns_d,tooltip,npp),
|
||||
mo_result,rs,cq]
|
||||
else:
|
||||
return [html.Div("No result"),mo_result,rs,cq]
|
||||
|
||||
|
||||
else: # need to search the index
|
||||
|
||||
result, tooltip, rel_sent_no, sent_no, rel_article_no, article_no=preprocess_corpus.search_corpus(corpus_ind, added, y_from, y_to)
|
||||
|
||||
result_df = pd.DataFrame.from_records(result)
|
||||
del result_df["document"]
|
||||
|
||||
|
||||
rel_sent_div=[ html.Div(className="number-card-1",children=[
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis",children=[rel_sent_no]),
|
||||
html.Div(className="number-label",children=[html.Div("RELEVANT SENTENCES")]),
|
||||
]),html.Div("|",className="saperator"),
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis2",children=[sent_no]),
|
||||
html.Div(className="number-label2",children=[html.Div("TOTAL SENTENCES")]),
|
||||
])
|
||||
]),
|
||||
html.Div(className="number-card-2",children=[
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis",children=[rel_article_no]),
|
||||
html.Div(className="number-label",children=[html.Div("RELEVANT DOCUMENTS")]),
|
||||
]),html.Div("|",className="saperator"),
|
||||
html.Div(className="number-back",children=[
|
||||
html.Div(className="number-dis2",children=[article_no]),
|
||||
html.Div(className="number-label2",children=[html.Div("TOTAL DOCUMENTS")]),
|
||||
])
|
||||
])
|
||||
]
|
||||
if len(result)>0:
|
||||
columns_d = [{"id": "id", "name": "id"}]
|
||||
columns_d.append({"id": "sentence", "name": "sentence"})
|
||||
for c in result_df.columns:
|
||||
if ((c != "id")&(c != "sentence")&(c in cols)):
|
||||
columns_d.append({"id": c, "name": c})
|
||||
|
||||
return [build_ranking_table(result,columns_d,tooltip,npp), [result,tooltip], rel_sent_div," | ".join(added)]
|
||||
else:
|
||||
return [html.Div("No result"),"",""," | ".join(added)]
|
||||
else:
|
||||
return [html.Div("Please type in the base term in the left pane"),"",""]
|
||||
return [html.Div("Please type in the base term in the left pane"),"","",""]
|
||||
else:
|
||||
return [html.Div("Start by selecting a corpus"),"",""]
|
||||
return [html.Div("Start by selecting a corpus"),"","",""]
|
||||
|
||||
def build_ranking_table(result,columns_d,tooltip,npp):
|
||||
return dash_table.DataTable(
|
||||
id="ranking_table",
|
||||
sort_action='native',
|
||||
sort_mode='multi',
|
||||
filter_action="native",
|
||||
style_header={"fontWeight": "bold", "color": "inherit","border-bottom":"1px dashed"},
|
||||
style_as_list_view=True,
|
||||
fill_width=True,
|
||||
page_size=npp,
|
||||
style_cell_conditional=[
|
||||
{"if": {"column_id": "sentence"}, 'width': '500px',"maxWidth":'500px'},
|
||||
{"if": {"column_id": "title"}, "padding-left":"15px","maxWidth":'300px'},
|
||||
{'if': {'column_id': 'author'},'maxWidth': '150px'},
|
||||
{'if': {'row_index': 'odd'},"backgroundColor":"#49494966"}
|
||||
],
|
||||
style_cell={
|
||||
"backgroundColor": "transparent",
|
||||
"fontFamily": "Open Sans",
|
||||
"padding": "0 0.2rem",
|
||||
"color": "#f4e9dc",
|
||||
"border": "none",
|
||||
'overflow': 'hidden',
|
||||
'textOverflow': 'ellipsis',
|
||||
'width': '55px',
|
||||
'minWidth': '55px',
|
||||
'maxWidth': '200px',
|
||||
"padding-left":"10px",
|
||||
"textAlign": "left"
|
||||
},
|
||||
|
||||
css=[
|
||||
#{"selector": ".dash-cell.focused","rule": "background-color: #f4e9dc !important; border:none;"},
|
||||
{"selector": "table", "rule": "--accent: #e85e56;"},
|
||||
{"selector": "tr:hover td", "rule": "color: #e85e56 !important; background-color:transparent !important; cursor:pointer;height:10px;"},
|
||||
{"selector": "td:hover", "rule": "border-bottom: dashed 0px !important;"},
|
||||
{"selector": ".dash-spreadsheet-container table",
|
||||
"rule": '--text-color: #e85e56 !important'},
|
||||
{"selector":".previous-next-container","rule":"float: left;"},
|
||||
{"selector": "tr", "rule": "background-color: transparent;"},
|
||||
{"selector": ".current-page", "rule": "background-color: transparent;"},
|
||||
{"selector":".current-page::placeholder","rule":"color:#e85e56;"},
|
||||
{"selector": ".column-header--sort","rule":"color: #e85e56; padding-right:3px;"}
|
||||
],
|
||||
style_data_conditional=[
|
||||
{"if": {"state": "active"}, # 'active' | 'selected'
|
||||
"border": "0px solid"}]+
|
||||
data_bars(result, 'score'),
|
||||
data=result,
|
||||
columns=columns_d,
|
||||
tooltip_data=tooltip,
|
||||
tooltip_delay=1000, #1s
|
||||
tooltip_duration=None,
|
||||
selected_rows=[]
|
||||
)
|
||||
|
||||
def data_bars(df, column):
|
||||
Scores=[]
|
||||
@@ -702,13 +777,11 @@ def data_bars(df, column):
|
||||
[State('memory-output', 'data')])
|
||||
def update_graphs(active_cell,data):
|
||||
if active_cell:
|
||||
for i in data:
|
||||
for i in data[0]:
|
||||
if i['id'] == active_cell['row_id']:
|
||||
return generate_modal(i['Document'])
|
||||
return generate_modal(i['document'])
|
||||
|
||||
# === Even with no else will give error, but it will close the pop-up automatically ===
|
||||
# else:
|
||||
# return [[]]
|
||||
raise PreventUpdate
|
||||
|
||||
# term selection in the sentence pop up window
|
||||
@app.callback(
|
||||
@@ -821,6 +894,7 @@ def generate_modal(text=""):
|
||||
State("added-terms-global", "children")]
|
||||
)
|
||||
def add_from_pop(n_clicks,values,added_value_global):
|
||||
|
||||
empty_list=[]
|
||||
new_add=[]
|
||||
|
||||
@@ -845,7 +919,10 @@ def add_from_pop(n_clicks,values,added_value_global):
|
||||
if not phrase in added_value_global:
|
||||
new_add.append(phrase)
|
||||
|
||||
return [empty_list,new_add]
|
||||
if n_clicks>0:
|
||||
return [empty_list,new_add]
|
||||
else:
|
||||
raise PreventUpdate
|
||||
|
||||
|
||||
# function for checking sentence frequency
|
||||
@@ -1552,11 +1629,11 @@ def build_big_graph(group_sf_dict,doc_num_year,year_from,year_to):
|
||||
),
|
||||
"autosize":False,
|
||||
"colorway": color_used,#px.colors.qualitative.Prism,
|
||||
"margin":dict(l=15,r=15,b=10,t=40,pad=4),
|
||||
"margin":dict(l=55,r=15,b=40,t=40,pad=4),
|
||||
"legend":dict(font=dict(color="#f4e9dc")),
|
||||
"template":"plotly_dark",
|
||||
"width":1100,
|
||||
"height":290,
|
||||
"height":330,
|
||||
"legend":dict(yanchor="top",y=0.99,xanchor="left",x=0.01),
|
||||
},
|
||||
}
|
||||
@@ -1930,7 +2007,7 @@ def update_corpus(data,options):
|
||||
)
|
||||
def render_tab_content(tab_switch, corpus_name):
|
||||
if tab_switch == "tab1":
|
||||
return [build_tab_1()]
|
||||
return [build_tab_1(corpus_name)]
|
||||
if tab_switch == "tab22":
|
||||
return [build_tab_group()]
|
||||
if tab_switch == "tab3":
|
||||
|
||||
@@ -27,8 +27,6 @@ label{
|
||||
#big-app-container {
|
||||
width: 100%;
|
||||
display: flex;
|
||||
/*flex-direction: column;*/
|
||||
/*align-items: center;*/
|
||||
padding: 0 4rem;
|
||||
}
|
||||
|
||||
@@ -471,7 +469,7 @@ lable {
|
||||
}
|
||||
|
||||
#Overview-tab.custom-tab,
|
||||
#Specs-tab.custom-tab, #Graph-tab.custom-tab,
|
||||
#Specs-tab.custom-tab, /*#Graph-tab.custom-tab,*/
|
||||
#Group-tab.custom-tab, #Control-chart-tab.custom-tab{
|
||||
margin-right: 3px;
|
||||
}
|
||||
@@ -866,7 +864,7 @@ lable {
|
||||
|
||||
.output-datatable {
|
||||
margin: 1.3rem 0rem 0rem 3rem;
|
||||
line-height:2.4;
|
||||
line-height:1.5;
|
||||
align-self: flex-end;
|
||||
width: 94%;
|
||||
font-variant: all-petite-caps;
|
||||
@@ -881,12 +879,13 @@ lable {
|
||||
|
||||
.react-numeric-input, .react-numeric-input input {
|
||||
width: 100% !important;
|
||||
height: 80%;
|
||||
}
|
||||
|
||||
.react-numeric-input input {
|
||||
background-color: #f4e9dc !important;
|
||||
background-color: transparent !important;
|
||||
border-color: #e85e56 !important;
|
||||
color: #e85e56 !important;
|
||||
color: inherit !important;
|
||||
}
|
||||
|
||||
.react-numeric-input b:nth-child(2) i {
|
||||
@@ -897,6 +896,18 @@ lable {
|
||||
border-color: #e85e56 transparent transparent !important;
|
||||
}
|
||||
|
||||
.sent-pp div .react-numeric-input input {
|
||||
border-color: #404040 !important;
|
||||
font-size: small !important;
|
||||
}
|
||||
|
||||
.sent-pp div .react-numeric-input b:nth-child(2) i {
|
||||
border-color: transparent transparent #404040 !important;
|
||||
}
|
||||
|
||||
.sent-pp div .react-numeric-input b:nth-child(3) i {
|
||||
border-color: #404040 transparent transparent !important;
|
||||
}
|
||||
/* Input
|
||||
----------------------------------------------*/
|
||||
|
||||
@@ -1056,11 +1067,10 @@ input[type="checkbox"]:checked{
|
||||
}
|
||||
.side-side-side2{
|
||||
display: block;
|
||||
height: 10%;
|
||||
min-height: 145px;
|
||||
height: 25vh;
|
||||
width: 90%;
|
||||
margin-top: 10px;
|
||||
margin-bottom: 10px;
|
||||
margin-bottom: 16px;
|
||||
border: solid #e85e56;
|
||||
border-width: 1px;
|
||||
border-radius: 4px;
|
||||
@@ -1126,34 +1136,91 @@ div#added-terms::-webkit-scrollbar-thumb {
|
||||
background-color: #e85e56;
|
||||
border-radius:10px;
|
||||
}
|
||||
.dash-tooltip {
|
||||
position: fixed !important;
|
||||
left:40% !important;
|
||||
top:50% !important;
|
||||
border: solid 2px #e85e56 !important;
|
||||
border-radius: 8px;
|
||||
background-color: rgba(31,29,36,0.95) !important;
|
||||
}
|
||||
.dash-tooltip:before{
|
||||
border: none !important;
|
||||
background-color: transparent !important;
|
||||
}
|
||||
.dash-tooltip:after{
|
||||
margin-bottom: 2px;
|
||||
border: dashed 0px transparent !important;
|
||||
border-bottom-color: rgba(232, 94, 86,0.8) !important;
|
||||
background-color: transparent !important;
|
||||
}
|
||||
|
||||
|
||||
.dash-table-tooltip{
|
||||
max-width: 600px !important;
|
||||
background-color: transparent !important;
|
||||
border: none !important;
|
||||
border-radius: 5px;
|
||||
color:#e85e56;
|
||||
line-height: 25px;
|
||||
font-size: large;
|
||||
}
|
||||
|
||||
div#ranking-table::-webkit-scrollbar {
|
||||
width:3px;
|
||||
height: 5px;
|
||||
width:15px;
|
||||
height: 15px;
|
||||
background:transparent;
|
||||
margin-left: 3px;
|
||||
}
|
||||
div#ranking-table::-webkit-scrollbar-track {
|
||||
background:none;
|
||||
}
|
||||
|
||||
div#ranking-table:hover ::-webkit-scrollbar-track {
|
||||
background:#f4e9dc;
|
||||
border: 1px solid #1e1c24;
|
||||
background:transparent;
|
||||
border: 0px solid #1e1c24;
|
||||
border-radius:10px;
|
||||
}
|
||||
|
||||
div#ranking-table:-webkit-scrollbar-thumb {
|
||||
background: none;
|
||||
div#ranking-table::-webkit-scrollbar-corner {
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
div#ranking-table:hover::-webkit-scrollbar-thumb {
|
||||
div#ranking-table::-webkit-scrollbar-thumb {
|
||||
border: 5px solid #1e1c24;
|
||||
background-color: #e85e56;
|
||||
border-radius:10px;
|
||||
}
|
||||
|
||||
|
||||
|
||||
div#make-groups{
|
||||
overflow-y: scroll;
|
||||
max-height: 25vh;
|
||||
padding-bottom: 15vh;
|
||||
}
|
||||
div#make-groups::-webkit-scrollbar {
|
||||
width:5px;
|
||||
height: 0px;
|
||||
background:transparent;
|
||||
margin-left: 3px;
|
||||
}
|
||||
div#make-groups::-webkit-scrollbar-track {
|
||||
background:transparent;
|
||||
border: 0px solid #1e1c24;
|
||||
border-radius:10px;
|
||||
}
|
||||
|
||||
|
||||
div#make-groups::-webkit-scrollbar-thumb {
|
||||
border: 0px solid #1e1c24;
|
||||
background-color: #e85e56;
|
||||
border-radius:10px;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#ranking_table{
|
||||
padding-right: 10px;
|
||||
}
|
||||
|
||||
|
||||
div#display-cards::-webkit-scrollbar {
|
||||
width:3px;
|
||||
height: 5px;
|
||||
@@ -1420,14 +1487,66 @@ div#search-top-result:hover::-webkit-scrollbar-thumb {
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
.table-dropdown > .Select-control > span {
|
||||
|
||||
background: transparent !important;
|
||||
border-color: #e85e56 !important;
|
||||
}
|
||||
.table-dropdown div {
|
||||
background-color: #1f1d24 !important;
|
||||
border-color: #404040 !important;
|
||||
}
|
||||
|
||||
.table-dropdown > .Select-control > .Select-multi-value-wrapper > .Select-value {
|
||||
background-color: #494949;
|
||||
border-color: #494949;
|
||||
color: #f4e9dc;
|
||||
|
||||
border-radius: 5px;
|
||||
}
|
||||
.table-dropdown > .Select-control > .Select-multi-value-wrapper > .Select-value > .Select-value-label {
|
||||
color: #f4e9dc;
|
||||
font-size: x-small;
|
||||
}
|
||||
|
||||
.table-dropdown > .Select-control > .Select-multi-value-wrapper > .Select-value > .Select-value-icon {
|
||||
border-right: 0px dashed #e85e56;
|
||||
}
|
||||
|
||||
.table-dropdown > .Select-control > .Select-multi-value-wrapper > .Select-value > .Select-value-icon:hover {
|
||||
color: #e85e56;
|
||||
box-shadow: none;
|
||||
}
|
||||
|
||||
.table-dropdown{
|
||||
width: 155%;
|
||||
}
|
||||
|
||||
.table-dropdown > .Select-control {
|
||||
margin-left: 0% !important;
|
||||
}
|
||||
|
||||
.table-dropdown > .Select-menu-outer div {
|
||||
font-size: small !important;
|
||||
}
|
||||
|
||||
|
||||
.dash-table-container{
|
||||
width:100%;
|
||||
}
|
||||
#display-cards{
|
||||
overflow-x: scroll;
|
||||
display: flex;
|
||||
margin-left: 2px;
|
||||
margin-bottom: 10px;
|
||||
}
|
||||
#ranking-table{
|
||||
overflow-x: scroll;
|
||||
overflow: scroll;
|
||||
display: flex;
|
||||
height: 74vh;
|
||||
}
|
||||
#display-group-graph{
|
||||
overflow-x: scroll;
|
||||
@@ -1451,16 +1570,16 @@ div#search-top-result:hover::-webkit-scrollbar-thumb {
|
||||
|
||||
.Select-menu-outer ::-webkit-scrollbar {
|
||||
width:6px;
|
||||
background:#f4e9dc;
|
||||
background:#1e1c24;
|
||||
}
|
||||
.Select-menu-outer ::-webkit-scrollbar-thumb {
|
||||
border: 1px solid #e85e56;
|
||||
border: 0px solid #e85e56;
|
||||
background-color: #e85e56;
|
||||
border-radius:10px;
|
||||
}
|
||||
.Select-menu-outer::-webkit-scrollbar-track {
|
||||
background:#f4e9dc;
|
||||
border: 1px solid #f4e9dc;
|
||||
.Select-menu-outer ::-webkit-scrollbar-track {
|
||||
background:#1e1c24;
|
||||
border: 0px solid #f4e9dc;
|
||||
border-radius:10px;
|
||||
}
|
||||
|
||||
@@ -1610,10 +1729,9 @@ div#search-top-result:hover::-webkit-scrollbar-thumb {
|
||||
#app-content {
|
||||
background: inherit;
|
||||
padding: 0;
|
||||
width: 101%;
|
||||
/*max-width: 95%;*/
|
||||
height: 97%;
|
||||
overflow-y: scroll;
|
||||
/*overflow-y: scroll;*/
|
||||
}
|
||||
|
||||
#settings-menu {
|
||||
@@ -1656,7 +1774,8 @@ div#search-top-result:hover::-webkit-scrollbar-thumb {
|
||||
}
|
||||
|
||||
.figure-side-by-side{
|
||||
|
||||
margin-top: 25px;
|
||||
margin-bottom: 10px;
|
||||
|
||||
}
|
||||
.cmSQpo{
|
||||
|
||||
BIN
corpus_save
BIN
corpus_save
Binary file not shown.
BIN
topic_model/.DS_Store
vendored
Normal file
BIN
topic_model/.DS_Store
vendored
Normal file
Binary file not shown.
@@ -54,7 +54,7 @@ def build_model(df_full,corpus_name,content_col):
|
||||
|
||||
# NLTK Stop words
|
||||
from nltk.corpus import stopwords
|
||||
# nltk.download('stopwords')
|
||||
nltk.download('stopwords')
|
||||
stop_words = stopwords.words('english')
|
||||
stop_words.extend(['from', 'subject', 're', 'edu', 'use'])
|
||||
|
||||
|
||||
BIN
whoosh_search/.DS_Store
vendored
Normal file
BIN
whoosh_search/.DS_Store
vendored
Normal file
Binary file not shown.
@@ -116,6 +116,7 @@ def delete_corpus_from_app(index_dir):
|
||||
def add_new_corpus_from_app(index_dir,corpus_dict,id_col,text_col,title_col,year_col,author_col,add_cols):
|
||||
|
||||
doc_no_year={}
|
||||
sent_no_year={}
|
||||
doc_len_dict={}
|
||||
|
||||
path = os.path.join("./whoosh_search", index_dir)
|
||||
@@ -169,7 +170,12 @@ def add_new_corpus_from_app(index_dir,corpus_dict,id_col,text_col,title_col,year
|
||||
doc_len_dict[doc_len]+=1
|
||||
else:
|
||||
doc_len_dict[doc_len]=1
|
||||
|
||||
|
||||
if year in sent_no_year:
|
||||
sent_no_year[year]+=sen_no
|
||||
else:
|
||||
sent_no_year[year]=sen_no
|
||||
|
||||
if year in doc_no_year:
|
||||
doc_no_year[year]+=1
|
||||
else:
|
||||
@@ -192,6 +198,12 @@ def add_new_corpus_from_app(index_dir,corpus_dict,id_col,text_col,title_col,year
|
||||
f3.write(text)
|
||||
f3.close()
|
||||
|
||||
f4 = open(path+"sent_num", "w")
|
||||
for year in sent_no_year:
|
||||
text=year+" "+str(sent_no_year[year])+"\n"
|
||||
f4.write(text)
|
||||
f4.close()
|
||||
|
||||
print("[ Indexing Finished. In total "+str(line_no)+" documents. ]")
|
||||
return True
|
||||
|
||||
@@ -261,8 +273,12 @@ def filter_corpus(corpus_ind_dir, query_list,year_from, year_to):
|
||||
|
||||
|
||||
# search by query
|
||||
def search_corpus(corpus_ind_dir, query_list,year_from, year_to,top_n=100): #the query term in the list will be connected by OR
|
||||
def search_corpus(corpus_ind_dir, query_list,year_from, year_to,top_n=1000): #the query term in the list will be connected by OR
|
||||
|
||||
import time
|
||||
|
||||
start = time.time()
|
||||
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
|
||||
with ix.searcher() as searcher:
|
||||
@@ -286,11 +302,13 @@ def search_corpus(corpus_ind_dir, query_list,year_from, year_to,top_n=100): #the
|
||||
q2 = query.Or(term_list_Y)
|
||||
|
||||
q_f = query.And([q1,q2])
|
||||
|
||||
|
||||
|
||||
# search the index
|
||||
results = searcher.search(q_f,limit=None)
|
||||
|
||||
|
||||
result_list=[]
|
||||
full_sents =[]
|
||||
relevant_article_ids=[]
|
||||
i=0
|
||||
|
||||
@@ -304,11 +322,11 @@ def search_corpus(corpus_ind_dir, query_list,year_from, year_to,top_n=100): #the
|
||||
row_data = {}
|
||||
|
||||
row_data["id"] = r["id"]
|
||||
row_data["Year"] = r["year"]
|
||||
row_data["Sentence"] = r["content"].lower()#snipet
|
||||
row_data["Title"] = r["title"].lower()
|
||||
row_data["Author"] = r["author"]
|
||||
row_data["Document"] = r["content"].lower()
|
||||
row_data["year"] = r["year"]
|
||||
row_data["sentence"] = r["content"].lower()#snipet
|
||||
row_data["title"] = r["title"].lower()
|
||||
row_data["author"] = r["author"]
|
||||
row_data["document"] = r["content"].lower()
|
||||
|
||||
for key in r:
|
||||
if key in ["content", "id", "title", "year", "author"]:
|
||||
@@ -316,9 +334,12 @@ def search_corpus(corpus_ind_dir, query_list,year_from, year_to,top_n=100): #the
|
||||
else:
|
||||
row_data[key]=r[key]
|
||||
|
||||
row_data["Score"] = round(r.score,3)
|
||||
row_data["score"] = round(r.score,3)
|
||||
|
||||
result_list.append(row_data)
|
||||
full_sents.append({"sentence":row_data["document"]})
|
||||
else:
|
||||
break
|
||||
|
||||
with open(corpus_ind_dir+"/doc_num") as f:
|
||||
total_doc_no = 0
|
||||
@@ -332,7 +353,19 @@ def search_corpus(corpus_ind_dir, query_list,year_from, year_to,top_n=100): #the
|
||||
|
||||
f.close()
|
||||
|
||||
return [result_list, len(results), len(searcher.search(q2,limit=None)), len(relevant_article_ids),total_doc_no]
|
||||
with open(corpus_ind_dir+"/sent_num") as f:
|
||||
total_sent_no = 0
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
sent_num=line.strip().split()
|
||||
if ((int(sent_num[0])>=year_from) & (int(sent_num[0])<=year_to)):
|
||||
total_sent_no+=int(sent_num[1])
|
||||
|
||||
f.close()
|
||||
|
||||
print("Results returned:", time.time() - start)
|
||||
return [result_list, full_sents, len(results), total_sent_no, len(relevant_article_ids),total_doc_no]
|
||||
|
||||
def check_sf(corpus_ind_dir,query_list):
|
||||
query_l=[]
|
||||
@@ -580,3 +613,9 @@ def get_fieldnames(corpus_ind_dir):
|
||||
fileds.remove('id')
|
||||
fileds.remove('title')
|
||||
return fileds
|
||||
|
||||
def get_table_fieldnames(corpus_ind_dir):
|
||||
fileds=index.open_dir(corpus_ind_dir).schema.stored_names()
|
||||
fileds.remove('content')
|
||||
fileds.remove('id')
|
||||
return fileds
|
||||
7
whoosh_search/sample_data_index/sent_num
Normal file
7
whoosh_search/sample_data_index/sent_num
Normal file
@@ -0,0 +1,7 @@
|
||||
2012 49
|
||||
2013 58
|
||||
2014 49
|
||||
2015 46
|
||||
2016 38
|
||||
2017 72
|
||||
2018 53
|
||||
Reference in New Issue
Block a user