mirror of
https://github.com/alicia-ziying-yang/conTEXT-explorer.git
synced 2022-02-20 22:06:42 +03:00
latest
This commit is contained in:
BIN
assets/.DS_Store
vendored
Normal file
BIN
assets/.DS_Store
vendored
Normal file
Binary file not shown.
602
assets/base-styles.css
Normal file
602
assets/base-styles.css
Normal file
@@ -0,0 +1,602 @@
|
||||
/* Table of contents
|
||||
––––––––––––––––––––––––––––––––––––––––––––––––––
|
||||
- Plotly.js
|
||||
- Grid
|
||||
- Base Styles
|
||||
- Typography
|
||||
- Links
|
||||
- Buttons
|
||||
- Forms
|
||||
- Lists
|
||||
- Code
|
||||
- Tables
|
||||
- Spacing
|
||||
- Utilities
|
||||
- Clearing
|
||||
- Media Queries
|
||||
*/
|
||||
|
||||
/* PLotly.js
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
/* plotly.js's modebar's z-index is 1001 by default
|
||||
* https://github.com/plotly/plotly.js/blob/7e4d8ab164258f6bd48be56589dacd9bdd7fded2/src/css/_modebar.scss#L5
|
||||
* In case a dropdown is above the graph, the dropdown's options
|
||||
* will be rendered below the modebar
|
||||
* Increase the select option's z-index
|
||||
*/
|
||||
|
||||
/* This was actually not quite right -
|
||||
dropdowns were overlapping each other (edited October 26)
|
||||
|
||||
.Select {
|
||||
z-index: 1002;
|
||||
}*/
|
||||
|
||||
/* Grid
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
|
||||
/* For devices larger than 400px */
|
||||
@media (min-width: 400px) {
|
||||
.container {
|
||||
width: 85%;
|
||||
padding: 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* For devices larger than 550px */
|
||||
@media (min-width: 550px) {
|
||||
.container {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
max-width: 960px;
|
||||
margin: 0 auto;
|
||||
padding: 0 20px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.column,
|
||||
.columns {
|
||||
float: left;
|
||||
margin-left: 4%;
|
||||
}
|
||||
|
||||
.column:first-child,
|
||||
.columns:first-child {
|
||||
margin-left: 0%;
|
||||
}
|
||||
|
||||
.one.column,
|
||||
.one.columns {
|
||||
width: 4.66666666667%;
|
||||
}
|
||||
|
||||
.two.columns {
|
||||
width: 13.3333333333%;
|
||||
}
|
||||
|
||||
.three.columns {
|
||||
width: 22%;
|
||||
}
|
||||
|
||||
.four.columns {
|
||||
width: 30.6666666667%;
|
||||
}
|
||||
|
||||
.five.columns {
|
||||
width: 39.3333333333%;
|
||||
}
|
||||
|
||||
.six.columns {
|
||||
width: 48%;
|
||||
}
|
||||
|
||||
.seven.columns {
|
||||
width: 56.6666666667%;
|
||||
}
|
||||
|
||||
.eight.columns {
|
||||
width: 65.3333333333%;
|
||||
}
|
||||
|
||||
.nine.columns {
|
||||
width: 74.0%;
|
||||
}
|
||||
|
||||
.ten.columns {
|
||||
width: 82.6666666667%;
|
||||
}
|
||||
|
||||
.eleven.columns {
|
||||
width: 91.3333333333%;
|
||||
}
|
||||
|
||||
.twelve.columns {
|
||||
width: 100%;
|
||||
margin-left: 0;
|
||||
}
|
||||
|
||||
.one-third.column {
|
||||
width: 30.6666666667%;
|
||||
}
|
||||
|
||||
.two-thirds.column {
|
||||
width: 65.3333333333%;
|
||||
}
|
||||
|
||||
.one-half.column {
|
||||
width: 48%;
|
||||
}
|
||||
|
||||
/* Offsets */
|
||||
.offset-by-one.column,
|
||||
.offset-by-one.columns {
|
||||
margin-left: 8.66666666667%;
|
||||
}
|
||||
|
||||
.offset-by-two.column,
|
||||
.offset-by-two.columns {
|
||||
margin-left: 17.3333333333%;
|
||||
}
|
||||
|
||||
.offset-by-three.column,
|
||||
.offset-by-three.columns {
|
||||
margin-left: 26%;
|
||||
}
|
||||
|
||||
.offset-by-four.column,
|
||||
.offset-by-four.columns {
|
||||
margin-left: 34.6666666667%;
|
||||
}
|
||||
|
||||
.offset-by-five.column,
|
||||
.offset-by-five.columns {
|
||||
margin-left: 43.3333333333%;
|
||||
}
|
||||
|
||||
.offset-by-six.column,
|
||||
.offset-by-six.columns {
|
||||
margin-left: 52%;
|
||||
}
|
||||
|
||||
.offset-by-seven.column,
|
||||
.offset-by-seven.columns {
|
||||
margin-left: 60.6666666667%;
|
||||
}
|
||||
|
||||
.offset-by-eight.column,
|
||||
.offset-by-eight.columns {
|
||||
margin-left: 69.3333333333%;
|
||||
}
|
||||
|
||||
.offset-by-nine.column,
|
||||
.offset-by-nine.columns {
|
||||
margin-left: 78.0%;
|
||||
}
|
||||
|
||||
.offset-by-ten.column,
|
||||
.offset-by-ten.columns {
|
||||
margin-left: 86.6666666667%;
|
||||
}
|
||||
|
||||
.offset-by-eleven.column,
|
||||
.offset-by-eleven.columns {
|
||||
margin-left: 95.3333333333%;
|
||||
}
|
||||
|
||||
.offset-by-one-third.column,
|
||||
.offset-by-one-third.columns {
|
||||
margin-left: 34.6666666667%;
|
||||
}
|
||||
|
||||
.offset-by-two-thirds.column,
|
||||
.offset-by-two-thirds.columns {
|
||||
margin-left: 69.3333333333%;
|
||||
}
|
||||
|
||||
.offset-by-one-half.column,
|
||||
.offset-by-one-half.columns {
|
||||
margin-left: 52%;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Base Styles
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
/* NOTE
|
||||
html is set to 62.5% so that all the REM measurements throughout Skeleton
|
||||
are based on 10px sizing. So basically 1.5rem = 15px :) */
|
||||
html {
|
||||
font-size: 62.5%;
|
||||
}
|
||||
|
||||
body {
|
||||
font-size: 1.5em; /* currently ems cause chrome bug misinterpreting rems on body element */
|
||||
line-height: 1.6;
|
||||
font-weight: 400;
|
||||
font-family: "Open Sans", "HelveticaNeue", "Helvetica Neue", Helvetica, Arial, sans-serif;
|
||||
color: rgb(50, 50, 50);
|
||||
}
|
||||
|
||||
|
||||
/* Typography
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
margin-top: 0;
|
||||
margin-bottom: 0;
|
||||
font-weight: 300;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 4.5rem;
|
||||
line-height: 1.2;
|
||||
letter-spacing: -.1rem;
|
||||
margin-bottom: 2rem;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 3.6rem;
|
||||
line-height: 1.25;
|
||||
letter-spacing: -.1rem;
|
||||
margin-bottom: 1.8rem;
|
||||
margin-top: 1.8rem;
|
||||
}
|
||||
|
||||
h3 {
|
||||
font-size: 3.0rem;
|
||||
line-height: 1.3;
|
||||
letter-spacing: -.1rem;
|
||||
margin-bottom: 1.5rem;
|
||||
margin-top: 1.5rem;
|
||||
}
|
||||
|
||||
h4 {
|
||||
font-size: 2.6rem;
|
||||
line-height: 1.35;
|
||||
letter-spacing: -.08rem;
|
||||
margin-bottom: 1.2rem;
|
||||
margin-top: 1.2rem;
|
||||
}
|
||||
|
||||
h5 {
|
||||
font-size: 2.2rem;
|
||||
line-height: 1.5;
|
||||
letter-spacing: -.05rem;
|
||||
margin-bottom: 0.6rem;
|
||||
margin-top: 0.6rem;
|
||||
}
|
||||
|
||||
h6 {
|
||||
font-size: 2.0rem;
|
||||
line-height: 1.6;
|
||||
letter-spacing: 0;
|
||||
margin-bottom: 0.75rem;
|
||||
margin-top: 0.75rem;
|
||||
}
|
||||
|
||||
p {
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
|
||||
/* Blockquotes
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
blockquote {
|
||||
border-left: 4px lightgrey solid;
|
||||
padding-left: 1rem;
|
||||
margin-top: 2rem;
|
||||
margin-bottom: 2rem;
|
||||
margin-left: 0rem;
|
||||
}
|
||||
|
||||
|
||||
/* Links
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
a {
|
||||
color: #1EAEDB;
|
||||
text-decoration: underline;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
a:hover {
|
||||
color: #0FA0CE;
|
||||
}
|
||||
|
||||
|
||||
/* Buttons
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
.button,
|
||||
button,
|
||||
input[type="submit"],
|
||||
input[type="reset"],
|
||||
input[type="button"] {
|
||||
display: inline-block;
|
||||
height: 38px;
|
||||
padding: 0 30px;
|
||||
color: #555;
|
||||
text-align: center;
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
line-height: 38px;
|
||||
letter-spacing: .1rem;
|
||||
text-transform: uppercase;
|
||||
text-decoration: none;
|
||||
white-space: nowrap;
|
||||
background-color: transparent;
|
||||
border-radius: 4px;
|
||||
border: 1px solid #bbb;
|
||||
cursor: pointer;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.button:hover,
|
||||
button:hover,
|
||||
input[type="submit"]:hover,
|
||||
input[type="reset"]:hover,
|
||||
input[type="button"]:hover,
|
||||
.button:focus,
|
||||
button:focus,
|
||||
input[type="submit"]:focus,
|
||||
input[type="reset"]:focus,
|
||||
input[type="button"]:focus {
|
||||
color: #333;
|
||||
border-color: #888;
|
||||
outline: 0;
|
||||
}
|
||||
|
||||
.button.button-primary,
|
||||
button.button-primary,
|
||||
input[type="submit"].button-primary,
|
||||
input[type="reset"].button-primary,
|
||||
input[type="button"].button-primary {
|
||||
color: #FFF;
|
||||
background-color: #33C3F0;
|
||||
border-color: #33C3F0;
|
||||
}
|
||||
|
||||
.button.button-primary:hover,
|
||||
button.button-primary:hover,
|
||||
input[type="submit"].button-primary:hover,
|
||||
input[type="reset"].button-primary:hover,
|
||||
input[type="button"].button-primary:hover,
|
||||
.button.button-primary:focus,
|
||||
button.button-primary:focus,
|
||||
input[type="submit"].button-primary:focus,
|
||||
input[type="reset"].button-primary:focus,
|
||||
input[type="button"].button-primary:focus {
|
||||
color: #FFF;
|
||||
background-color: #1EAEDB;
|
||||
border-color: #1EAEDB;
|
||||
}
|
||||
|
||||
|
||||
/* Forms
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
input[type="email"],
|
||||
input[type="number"],
|
||||
input[type="search"],
|
||||
input[type="text"],
|
||||
input[type="tel"],
|
||||
input[type="url"],
|
||||
input[type="password"],
|
||||
textarea,
|
||||
select {
|
||||
height: 38px;
|
||||
padding: 6px 10px; /* The 6px vertically centers text on FF, ignored by Webkit */
|
||||
background-color: #fff;
|
||||
border: 1px solid #D1D1D1;
|
||||
border-radius: 4px;
|
||||
box-shadow: none;
|
||||
box-sizing: border-box;
|
||||
font-family: inherit;
|
||||
font-size: inherit; /*https://stackoverflow.com/questions/6080413/why-doesnt-input-inherit-the-font-from-body*/
|
||||
}
|
||||
|
||||
/* Removes awkward default styles on some inputs for iOS */
|
||||
input[type="email"],
|
||||
input[type="number"],
|
||||
input[type="search"],
|
||||
input[type="text"],
|
||||
input[type="tel"],
|
||||
input[type="url"],
|
||||
input[type="password"],
|
||||
textarea {
|
||||
-webkit-appearance: none;
|
||||
-moz-appearance: none;
|
||||
appearance: none;
|
||||
}
|
||||
|
||||
textarea {
|
||||
min-height: 65px;
|
||||
padding-top: 6px;
|
||||
padding-bottom: 6px;
|
||||
}
|
||||
|
||||
input[type="email"]:focus,
|
||||
input[type="number"]:focus,
|
||||
input[type="search"]:focus,
|
||||
input[type="text"]:focus,
|
||||
input[type="tel"]:focus,
|
||||
input[type="url"]:focus,
|
||||
input[type="password"]:focus,
|
||||
textarea:focus,
|
||||
/*select:focus {*/
|
||||
/* border: 1px solid #33C3F0;*/
|
||||
/* outline: 0; }*/
|
||||
label,
|
||||
legend {
|
||||
display: block;
|
||||
margin-bottom: 0px;
|
||||
}
|
||||
|
||||
fieldset {
|
||||
padding: 0;
|
||||
border-width: 0;
|
||||
}
|
||||
|
||||
input[type="checkbox"],
|
||||
input[type="radio"] {
|
||||
display: inline;
|
||||
}
|
||||
|
||||
label > .label-body {
|
||||
display: inline-block;
|
||||
margin-left: .5rem;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
|
||||
/* Lists
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
ul {
|
||||
list-style: circle inside;
|
||||
}
|
||||
|
||||
ol {
|
||||
list-style: decimal inside;
|
||||
}
|
||||
|
||||
ol, ul {
|
||||
padding-left: 0;
|
||||
margin-top: 0;
|
||||
}
|
||||
|
||||
ul ul,
|
||||
ul ol,
|
||||
ol ol,
|
||||
ol ul {
|
||||
margin: 1.5rem 0 1.5rem 3rem;
|
||||
font-size: 90%;
|
||||
}
|
||||
|
||||
li {
|
||||
margin-top: 3px;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/* Tables
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
|
||||
th,
|
||||
td {
|
||||
padding: 12px 15px;
|
||||
text-align: left;
|
||||
border-bottom: 1px solid #E1E1E1;
|
||||
}
|
||||
|
||||
th:first-child,
|
||||
td:first-child {
|
||||
padding-left: 0;
|
||||
}
|
||||
|
||||
th:last-child,
|
||||
td:last-child {
|
||||
padding-right: 0;
|
||||
}
|
||||
|
||||
|
||||
/* Spacing
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
button,
|
||||
.button {
|
||||
margin-bottom: 0rem;
|
||||
}
|
||||
|
||||
input,
|
||||
textarea,
|
||||
select,
|
||||
fieldset {
|
||||
margin-bottom: 0rem;
|
||||
}
|
||||
|
||||
pre,
|
||||
dl,
|
||||
figure,
|
||||
table,
|
||||
form {
|
||||
margin-bottom: 0rem;
|
||||
}
|
||||
|
||||
p,
|
||||
ul,
|
||||
ol {
|
||||
margin-bottom: 0.75rem;
|
||||
}
|
||||
|
||||
/* Utilities
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
.u-full-width {
|
||||
width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.u-max-full-width {
|
||||
max-width: 100%;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.u-pull-right {
|
||||
float: right;
|
||||
}
|
||||
|
||||
.u-pull-left {
|
||||
float: left;
|
||||
}
|
||||
|
||||
|
||||
/* Misc
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
hr {
|
||||
margin-top: 3rem;
|
||||
margin-bottom: 3.5rem;
|
||||
border-width: 0;
|
||||
border-top: 1px solid #E1E1E1;
|
||||
}
|
||||
|
||||
|
||||
/* Clearing
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
|
||||
/* Self Clearing Goodness */
|
||||
.container:after,
|
||||
.row:after,
|
||||
.u-cf {
|
||||
content: "";
|
||||
display: table;
|
||||
clear: both;
|
||||
}
|
||||
|
||||
|
||||
/* Media Queries
|
||||
–––––––––––––––––––––––––––––––––––––––––––––––––– */
|
||||
/*
|
||||
Note: The best way to structure the use of media queries is to create the queries
|
||||
near the relevant code. For example, if you wanted to change the styles for buttons
|
||||
on small devices, paste the mobile query code up in the buttons section and style it
|
||||
there.
|
||||
*/
|
||||
|
||||
|
||||
/* Larger than mobile */
|
||||
@media (min-width: 400px) {
|
||||
}
|
||||
|
||||
/* Larger than phablet (also point when grid becomes active) */
|
||||
@media (min-width: 550px) {
|
||||
}
|
||||
|
||||
/* Larger than tablet */
|
||||
@media (min-width: 750px) {
|
||||
}
|
||||
|
||||
/* Larger than desktop */
|
||||
@media (min-width: 1000px) {
|
||||
}
|
||||
|
||||
/* Larger than Desktop HD */
|
||||
@media (min-width: 1200px) {
|
||||
}
|
||||
2018
assets/custom-styles.css
Normal file
2018
assets/custom-styles.css
Normal file
File diff suppressed because it is too large
Load Diff
BIN
assets/dash-logo-new.png
Normal file
BIN
assets/dash-logo-new.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 4.4 KiB |
57
assets/fonts.css
Normal file
57
assets/fonts.css
Normal file
@@ -0,0 +1,57 @@
|
||||
/* cyrillic-ext */
|
||||
@font-face {
|
||||
font-family: 'Open Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Open Sans Regular'), local('OpenSans-Regular'), url(https://fonts.gstatic.com/s/opensans/v15/mem8YaGs126MiZpBA-UFWJ0bf8pkAp6a.woff2) format('woff2');
|
||||
unicode-range: U+0460-052F, U+1C80-1C88, U+20B4, U+2DE0-2DFF, U+A640-A69F, U+FE2E-FE2F;
|
||||
}
|
||||
/* cyrillic */
|
||||
@font-face {
|
||||
font-family: 'Open Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Open Sans Regular'), local('OpenSans-Regular'), url(https://fonts.gstatic.com/s/opensans/v15/mem8YaGs126MiZpBA-UFUZ0bf8pkAp6a.woff2) format('woff2');
|
||||
unicode-range: U+0400-045F, U+0490-0491, U+04B0-04B1, U+2116;
|
||||
}
|
||||
/* greek-ext */
|
||||
@font-face {
|
||||
font-family: 'Open Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Open Sans Regular'), local('OpenSans-Regular'), url(https://fonts.gstatic.com/s/opensans/v15/mem8YaGs126MiZpBA-UFWZ0bf8pkAp6a.woff2) format('woff2');
|
||||
unicode-range: U+1F00-1FFF;
|
||||
}
|
||||
/* greek */
|
||||
@font-face {
|
||||
font-family: 'Open Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Open Sans Regular'), local('OpenSans-Regular'), url(https://fonts.gstatic.com/s/opensans/v15/mem8YaGs126MiZpBA-UFVp0bf8pkAp6a.woff2) format('woff2');
|
||||
unicode-range: U+0370-03FF;
|
||||
}
|
||||
/* vietnamese */
|
||||
@font-face {
|
||||
font-family: 'Open Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Open Sans Regular'), local('OpenSans-Regular'), url(https://fonts.gstatic.com/s/opensans/v15/mem8YaGs126MiZpBA-UFWp0bf8pkAp6a.woff2) format('woff2');
|
||||
unicode-range: U+0102-0103, U+0110-0111, U+1EA0-1EF9, U+20AB;
|
||||
}
|
||||
/* latin-ext */
|
||||
@font-face {
|
||||
font-family: 'Open Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Open Sans Regular'), local('OpenSans-Regular'), url(https://fonts.gstatic.com/s/opensans/v15/mem8YaGs126MiZpBA-UFW50bf8pkAp6a.woff2) format('woff2');
|
||||
unicode-range: U+0100-024F, U+0259, U+1E00-1EFF, U+2020, U+20A0-20AB, U+20AD-20CF, U+2113, U+2C60-2C7F, U+A720-A7FF;
|
||||
}
|
||||
/* latin */
|
||||
@font-face {
|
||||
font-family: 'Open Sans';
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: local('Open Sans Regular'), local('OpenSans-Regular'), url(https://fonts.gstatic.com/s/opensans/v15/mem8YaGs126MiZpBA-UFVZ0bf8pkAg.woff2) format('woff2');
|
||||
unicode-range: U+0000-00FF, U+0131, U+0152-0153, U+02BB-02BC, U+02C6, U+02DA, U+02DC, U+2000-206F, U+2074, U+20AC, U+2122, U+2191, U+2193, U+2212, U+2215, U+FEFF, U+FFFD;
|
||||
}
|
||||
|
||||
BIN
assets/plotly_logo.png
Normal file
BIN
assets/plotly_logo.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 6.7 KiB |
BIN
corpus_save
Normal file
BIN
corpus_save
Normal file
Binary file not shown.
BIN
sample_data/.DS_Store
vendored
Normal file
BIN
sample_data/.DS_Store
vendored
Normal file
Binary file not shown.
41
sample_data/sample_data.csv
Normal file
41
sample_data/sample_data.csv
Normal file
@@ -0,0 +1,41 @@
|
||||
ID,Year,Person,Gender,Party,State,Text
|
||||
1,2012,"Cormann, Sen Mathias",1,LP,WA,"Collecting statistical information is core business for the ABS, and the government has full confidence in their capacity to deliver this opportunity for Australians to have their say. However, we need to be assured that the statistical information obtained has not been subject to influences such as bribery and threats or misleading information about how to complete the survey form, and ensure that those that engage in such influencing behaviour are held to account. To address this, the bill will put in place a range of measures to ensure the integrity of the statistics that are collected as part of the process. The bill includes penalties for receiving and giving bribes or making threats to influence or affect people's decisions on this matter, including whether to respond. It also includes a civil penalty for printing, publishing and distributing matters or things that are likely to mislead or deceive a person in how they respond to the survey, for example, how the survey response is marked."
|
||||
2,2012,"Cormann, Sen Mathias",1,LP,WA,"The bill also includes offences for officers who engage in conduct with the intention of influencing the content of a response provided to the statistician. Again, that is consistent with what would apply in the context of an election period. The government believes that the Australian people are able to have this debate respectfully and courteously. We also believe that Australians will judge anyone harshly, on either side of the debate, who pursues inappropriate and offensive arguments. We certainly call on all Australians to participate in this debate with courtesy and respect. However, the government acknowledges that we cannot guarantee that all Australians will at all times express their opinions on that basis. For this reason, the bill will also establish an offence for grievous conduct against those participating in the debate, or against those who may hold strong views on the survey question. The bill contains provisions against vilification, intimidation and threat to cause harm, as well as for hindering or interfering with a person in making a response, or discriminating against a person for making a donation relating to the marriage law survey."
|
||||
3,2012,"Cormann, Sen Mathias",1,LP,WA,"Importantly, and I stress this point, merely expressing a view about the marriage law survey question does not trigger the offence provisions against vilification, intimidation or the threat of harm. The conduct would have to be vilification, intimidation or threat to cause harm. While the government would like nothing more than for these provisions never to be used, their inclusion gives the parliament the opportunity to send a clear message that hateful and malicious conduct will not be tolerated. I remind the parliament that the government's preference was to deliver on the commitment to give the Australian people a say on whether or not the law should be changed to allow same-sex couples to marry through a compulsory attendance plebiscite. Such an approach would have brought with it many existing safeguards and protections. As we proceed with the survey as the new mechanism to give the Australian people their say, we need to separately provide those safeguards to ensure all Australians have the opportunity to participate in this process in the right environment."
|
||||
4,2012,"Wong, Sen Penny",0,ALP,SA,"I rise to speak on the Marriage Law Survey (Additional Safeguards (Bill) Bill 2017. This bill cannot cure a flawed process. It cannot stop all the hurt, all the prejudice that is being expressed, all the lack of acceptance that is being communicated to LGBTI persons and to same-sex-couple families. But it provides limited protections, and on that basis the opposition will be supporting it. I want to acknowledge Senator Cormann's work with stakeholders, the opposition and others, the crossbench, to reach agreement on the matters contained in this bill. I also recognise the efforts of Mr Dreyfus and Ms Butler from the Labor Party. Labor regrets that this bill is necessary. It wouldn't have been necessary if the Prime Minister had shown some leadership on this issueif he had been prepared to grant a free vote and have this parliament do its job. We all remember the origins of the plebiscite, which Senator Cormann has extolled the virtues of. In August 2015 there was a very long party room meeting to discuss a public vote on marriage equality, and what did we get?"
|
||||
5,2012,"Wong, Sen Penny",0,ALP,SA,"This policy of a plebiscite was dreamt upanother obstacle, another hurdle, another delay; an obstacle designed by hardline opponents, Mr Abbott and Senator Abetz, to make it more difficult for equality to be achieved. A plebiscite is a pretty cruel and cynical tactic. It is a mechanism designed by those who will never agree with equality in this country on this issue. It's a policy that had its origins in the dying days of the Abbott prime ministership but it was taken on by Mr Turnbull almost as an article of faith. That, I think, has been to his detriment and to the great disappointment of many people in this country. I thank the Senate for its rejection of the plebiscite on no fewer than two occasions. It would have been good if that had led the government to recognise that it was time for parliament to do its jobto do what millions of Australians wanted, which was to get a vote done on an issue that has, regrettably, been unresolved for too many years. When this chamber did the right thing, did what the Australian people soughtto vote against a political tacticthe government turned to a non-legislative method, which is, of course, the flawed postal survey that we are now confronted with. We are having a $122 million survey because the Prime Minister doesn't want a vote in the parliament. There are very few times when you get a number on someone's weakness but we have one: $122 million. It is a waste of time and a waste of money. As I have said, we oppose this survey, but those of us who support marriage equality on all sides have to campaign to win it. We are where we are and we have to stand up for our values."
|
||||
6,2012,"Gallagher, Sen Katy",0,ALP,ACT,"In the very short time before the dinner break, I'm going to make a start on my speech. I welcome the opportunity to speak on the Marriage Amendment (Definition and Religious Freedoms) Bill 2017. As many have already commented in this place, 15 November, just a couple of weeks ago now, was certainly a historic day. For me, it was the day that the Australian people told politiciansas they have in various polls over the past few yearsto get on and legislate for marriage equality. As many others have also said in this debate so far, I'm one of those who didn't believe the survey was needed and that the $100 millionor however much it turns out to bespent on holding it could have been used for so many more worthy pursuits. The harm that has been done, particularly to the LGBTIQ community and their families, will, for many, take a long time to overcome. The Australian community voted overwhelmingly. "
|
||||
7,2013,"Gallagher, Sen Katy",0,ALP,ACT,"Before the dinner break I was saying the Australian community has voted overwhelmingly to remove discrimination from marriage and to support equality, and now it is time for the Australian parliament to act and reflect that will of the Australian people in the bill that we are debating today. I'm not going to speak for too long, but I wanted to concentrate my remarks on the history of the equality campaign here in the ACT that I've now been involved in for more than 15 years. I think it is somewhat appropriate that we are debating this legislation here in Canberra which, whilst it is the nation's capital, the seat of government and the place where politics happen, is also a very strong and supportive community and town which has fought very hard to ensure that there is equality across the statute book and that all members of our community are treated equally."
|
||||
8,2013,"Seselja, Sen Zed",1,LP,ACT,"We are concerned that if changes are made to the Sex Discrimination Act 1984 without creating adequate positive protections for our schools that's the important point the effect would be to profoundly compromise the ability of our schools to act in accordance with their beliefs and convictions. †††††† no changes should be made to legislation affecting our schools, until there is also adequate protection in place for schools to maintain their beliefs and character as faith-based schools. Christian schools need legislation that gives us much clearer assurance that schools can continue to: Teach in a manner that is consistent with the religious beliefs of the school; Manage student behaviour according to the expectations and beliefs of the school community In further testimony, the Australian Association of Christian Schools said: if they are removed and adequate protection isn't given to schools to hold a commonly held biblical view of sexuality and relationships in what is taught and in managing school life and in who the school employs, you'll be carving out an area of faith and deeming it impermissible. You'll be deciding that those long-held beliefs of many Christians can't be expressed in education at alland that's a serious step for a government to take. They are concerned that, without adequate protection, schools could be forced to teach in ways that contradict what they genuinely believe, to act against their conscience and beliefs in the way they handle behaviour, and to employ staff who don't share in and meaningfully uphold the beliefs of the school. "
|
||||
9,2013,"Seselja, Sen Zed",1,LP,ACT,"In my contribution on Thursday, I made clear some of the concerns in the community about an unamended version of this bill and its implications. To quote Senator Keneally, in her contribution: We also know that the overwhelming majority of religious schools do not want or see the need for these exemptions. We know this as a result of the recent Senate inquiry, where Catholic and other religious school systems gave evidence that these exemptions are not used or relied upon. I went through Christian Schools Australia65,000 students plus the 15,000 from Adventist schoolsand the Australian Association of Christian Schools110 or so schools with 45,000 students. I also talked about the statements from members of the Australian Catholic Bishops Conference, who educate around 766,000 students in this country, and a number of others. Even if we were to accept, despite the fact that those numbers tell a very, very different story, that there are no significant concerns or that it's only a minority, this goes to the very point about what religious freedom is and what freedom of religious belief is. It's not about whether there's a majority who have a particular view; it's about whether or not there is a right to express that view."
|
||||
10,2013,"Kitching, Sen Kimberley",0,ALP,VIC,"It follows from this that such schools have a right to require that both students and teachers act in a way which is broadly consistent with the faith and practices of the religious denomination. If they are to be denied that right, the school cannot serve the purpose for which it was established. However, there is also a majority view in Australia that Australians should not be discriminated against on the grounds of sexuality or gender identity. In relation to schools, this is the view particularly in relation to discrimination against students. It is the need to reconcile these two broadly accepted propositions and the difficulty in doing so that has been brought into sharp focus in the wake of the marriage equality survey and subsequent legislation and the subsequent Ruddock report, which again I say we are yet to see. The Sex Discrimination Act currently provides certain exemptions to religious schools. The act makes it unlawful to discriminate in, amongst other things, education against people who have certain protected attributes, as the act prescribes. These attributes include sex, sexual orientation, gender identity, intersex status, marital or relationship status, pregnancy or potential pregnancy and breastfeeding. The bill before us would require that religious schools conform to this provision of the act in relation to students. I point out, however, that while sexual orientation, gender identity, intersex status and marital or relationship status are protected attributes under the act, political advocacy and activism are not. There has also been discussion around subsections (1) and (2) of section 38 of the act, and I think there is a need to reconcile and balance different rights. I believe the balancing of rights is desirable. In those circumstances, I believe an appropriately drafted, positively expressed right to religious freedom should be legislated. I want to thank the Senate Legal and Constitutional Affairs Committee. The committee worked together very well and productively and the secretariat workeddare I say ita miracle to produce the report in a very short time. Labor has decided to act, and it is on that basis that I'm happy to commend Senator Wong's bill to the Senate. "
|
||||
11,2013,"Boyce, Sen Sue",0,LP,QLD,"These aren't appeals to sympathy; they're arguments that same-sex marriage would be good for all of us--and for conservative reasons to boot. Those reasons are the same reasons that those who support marriage as the union between a man and a woman espouse for their marriages. A marriage is a special commitment. A marriage provides more security for those in it, including the children who would be in it. There is no reason not to allow same-sex marriage in Australia. I believe that this bill will assist us in moving towards that. If we are to vote on this bill, I will be supporting it. "
|
||||
12,2013,"Ludlam, Sen Scott",1,AG,WA,"I rise to support the Marriage Act Amendment (Recognition of Foreign Marriages for Same-Sex Couples) Bill 2013, which my colleague Senator Hanson-Young has brought before the chamber as part of a long and somewhat arduous campaign in favour of something that we in the Australian Greens believe is completely obvious. I want to acknowledge and congratulate Senator Boyce for the contribution that she just made. Those sorts of contributions are all too rare in this place. When people are considering matters and voting on their conscience, the quality of the debate is measurably improved. So thank you, Senator Boyce, for having the courage to do what you have just done. The only qualification I will make is that, in the Australian Greens, we do have a conscience vote on all matters. You have to go back quite some time in the Hansard record to find that being exercised, but we do vote with our conscience on all of these matters. I think it is part of the reason for our success. "
|
||||
13,2013,"Griff, Sen Stirling",1,CA,SA,"This bill represents a significant change to the Sex Discrimination Act and, as such, should not be rushed through this week. There is absolutely no urgency for this bill to be dealt with this week, as particularly religious schools generally don't rely on these provisions in practice. We support the intent of what Labor is trying to do with this bill for the simple reason that we do not support discrimination against students. We support the intent of the Greens to ensure teachers are not discriminated against and we also support the intent of the government to ensure religious freedoms are protected. Our view overall is that this bill should have been sent off for proper inquiry and scrutiny. This would provide all with the necessary time to properly consider all implications and input from stakeholders. As the bill now stands, our party considers the topic one for a conscience vote, but we all came to a united approach on thisand that is that we will support this bill because its intentions align with ours, but we cannot at this stage support any additional amendments. We have been forced into this position because we have not had a real opportunity to consider the implications and unintended consequences of all the amendments. "
|
||||
14,2014,"Griff, Sen Stirling",1,CA,SA,"On these amendments we have been hearing, particularly in the last two hours, a very diverse range of viewsmany of them conflictingwhich tells us that even those that seem somewhat straightforward may potentially open a Pandora's box of problems. For instance, we support what the government is seeking to do with ensuring teachers can teach in accordance with their faith. But this may simply create an avenue for discriminatory treatment of students to creep in. We are also unsure that we need to bolt on additional protections to the existing reasonableness test that exists at 7B in the act. Maybe it's appropriate or maybe it is unnecessary. We simply haven't had the time to fully consider the implications, so we will abstain on this and the other government amendments. We know there is bipartisan support in this place and the other place for amending the Sex Discrimination Act so that schools cannot discriminate against LGBTQI students and teachers. It is unfortunate that there is no consensus about how to proceed. Labor's bill amends section 37 of the Sex Discrimination Act and repeals section 38(3). In effect, this bill proposes to remove existing protections that currently allow religious schools to discriminate against lesbian, gay, bisexual, transgender, queer and intersex students if it's done in line with their religious doctrine or if it is to avoid injury to the religious susceptibilities of adherents of that religion. We appreciate that this bill makes religious schools very nervous. Removing familiar protections will inevitably have that effect. We accept this might mean religious schools have to rethink their approach to some students in some instances. But, in practice, this should not stop religious schools from teaching their faith. But we also understand that these provisions are not heavily relied on by schools as it is. My office has also been told by Christian school stakeholders that they fear some students might be tempted to make mischief and abuse the weakened protections. I'm not entirely sure how or why a student would do so, especially if they want to be at that school. But that kind of fearmongering misses the point. We need to legislate to protect the rights of the many, not to quash the rights of the many in order to protect against extreme scenarios. This bill may well end up not being legislated. We may yet see other attempts to amend the Sex Discrimination Act. Whatever we are faced with, in this parliament or the next, Centre Alliance will approach it with a view to ensuring equality of treatment for all students, regardless of their sexual orientation or gender identity. This is ultimately about keeping our society moving on a path towards equality and acceptance of all people."
|
||||
15,2014,"Wong, Sen Penny",0,ALP,SA,"I have spoken on the Second Readingg; I just want to briefly speak to the Second Readingg amendment moved by Senator Collins, which is on sheet 8606. So I rise to speak on that amendment. Today, as I've previously said, we do have an opportunity to do what this parliament too often fails to do; to come together on an issue on which we all agree and change the country for the better. The bill does one thing, and one thing only; that is, to ensure that every Australian child, no matter their gender or sexual orientation, is treated equally. It will make this country a more equal place, nothing more. It won't prevent schools from requiring students to attend chapel, it won't prevent schools from requiring uniforms and it won't preventlet me be very clearthe teaching of religious education. To put that issue beyond doubt, Senator Collins has moved the Second Readingg amendment on sheet 8606 to that effect. I support that amendment. "
|
||||
16,2014,"Hanson-Young, Sen Sarah",0,AG,SA,"I rise today to speak in favour of the Marriage Equality Amendment Bill 2010. I also point out to the senators in the chamber and, of course, to those who may be listening to this debate that this also includes amendments that have been circulated since this bill went through a very thorough Senate inquiry. The report of that inquiry determined, in order to put at ease some of the concerns in relation to religious freedom, that the bill be strengthened in that area. That would ensure that for those who want to make sure churches and religious groups can continue to marry those whom they feel they would best like to are able to do so without being impinged upon by any of the changes that this bill would inflict on the Marriage Act. The ability has always been there for religious organisations to determine who they marry and who they do not. We have churches making that decision on a daily basis. The circulated amendments suggest that there is no doubt that that is the case under this bill. "
|
||||
17,2014,"Hanson-Young, Sen Sarah",0,AG,SA,"The main purpose of this marriage equality bill is to remove the current discrimination in the Marriage Act so as to clearly allow for two people, regardless of their gender, regardless of their sexuality, the legal right to marry. This is a campaign for true equality within the Australian law. It is something that has been fought long and hard, not just here in Australia but also around the world. We know that country after country continues to take up this very, very important cause, putting truth behind the belief for equality for all. If this parliament were to agree to this bill and to pass it into law, we could take advantage of the strong opportunity that we know exists for same-sex couples in the Australian community who desperately want the right to marry under law. The parliament could take the opportunity by both hands to make a reform that the majority of Australians believe in. The majority of Australians now agree that marriage equality's time has come. Poll after poll has proven that there has been a shift in the mindset of the Australian community to accept that true equality must include the amendments to the Marriage Act to allow equality to reign in love. That is what this bill proposes to do."
|
||||
18,2015,"Hanson-Young, Sen Sarah",0,AG,SA,"There is huge support for marriage equality in Australia, and the parliament that gets it right, the parliament that can take hold of this need and desire for and belief in equality, will be a parliament that the Australian public congratulates wholeheartedly. We know that support for this reform crosses all boundaries cities, suburbs, regional areas, the bush, as well as the various political parties. The majority of coalition voters support marriage equality. The majority of Christian Australians support marriage equality. We have had representatives from various religious organisations and churches walk the halls of this place asking us to give all Australians a fair go by ensuring that the institution of marriage can be strengthened for evermore and that the strength that marriage gives families can be recognised by this parliament. The importance of allowing two people's relationship to be understood and defined by the universal language of marriage and love is something that MPs in this place should grab hold of; they should stand tall and accept that we are doing a good thing. The majority of Australians believe that this is the right thing to do, and more and more people agree that this change should happen. The fight for marriage equality is not going to be won and lost in this place; it has already been won out there in the Australian community. It is now up to this place to recognise that, unlike some issues, where we are leading the way, in this one, unfortunately, we are following. But we can turn that around and accept that if Cupid does not discriminate neither should the law. Love is love and equality does matter. "
|
||||
19,2015,"Brandis, Sen George",1,LP,QLD,"Let me very briefly state to the Senate the coalition's position on the Marriage Equality Amendment Bill 2010. The coalition made an undertaking to the Australian people at the 2010 election that we would support the existing definition of marriage and, having made that undertaking to the Australian people, we are not going to act at variance to it. The Labor Party has changed its position, because Julia Gillard gave a similar undertaking to the Australian people at the 2010 election but subsequently facilitated arrangements within the Labor Party to allow that undertaking to be vacated. When we in the coalition give an undertaking to the public we stick by it, whether it be on the carbon tax, private health insurance, or any issue, and this is one such issue. After listening to Senator Sarah Hanson-Young's speech I am bound to say that one would have thought there was only one available view. Senator Hanson-Young, I have to tell you that yours is not the only view. Much as those who advocate your view do so, I am sure, in good faith, you will not win this argument by seeking to silence alternative views. People are entitled to have their own views about marriage, as you have yours. People who have a more conservative view than yours about marriage are as much within their rights as you are."
|
||||
20,2015,"Brandis, Sen George",1,LP,QLD,"To me, your bill is a bill about marriage but it is not a bill about equality. Equality for same-sex people was won in this parliament, in this Senate, with the support of all parties, including mine, by the amendments that were made to a suite of Commonwealth statutes in 2008. I said at the time that it had been too long in coming. I had myself, within the Howard government, been agitating for that for years, as many of my colleagues had been. But after those bills were passed with the support of all parties there was no Commonwealth law which treated same-sex people in relationships any differently than opposite-sex people in a relationship. But your bill is not about equality, Senator Hanson-Young, even though you claim that it is; it is about marriage. Marriage is an institution defined by custom, religion and law, or at least by some of those things. For you to have discovered that an institution which has been understood to mean one particular thing for the entire history of humanity is, all of a sudden, a fundamentally unjust institution, as it is understood according to its traditional conception, is an extraordinary impertinence. When you use the phrase, 'Let us put behind us the dark pages of history,' do you really think, Senator Sarah Hanson-Young, that history began with you? Do you seriously think that the human conscience began with you? Or do you not allow for the fact that, just as you have your views, which I am sure are held in good faith, other people have their views which are also held in good faith. Those views reflect the entire understanding and the entire course of human history of what a marriage is, and you should, if I may say so, pay a little more respect to those who do not agree with you."
|
||||
21,2015,"Urquhart, Sen Anne",0,ALP,TAS,"I stand in this place as one of many strong supporters for marriage equality in the Labor Party. Marriage equality for many in our party fits with our values of fairness, equality, family and compassion, as well as reason, logic and progress. Values of fairness include fairness to all in our community, fairness for all those in our society who are doing it tough for many reasons, fairness to those who have a disability, fairness to those who are less fortunate, and fairness to have the opportunity to succeed regardless of circumstance. Values of equality include equality to all those in our society regardless of gender, religion, race, sexual orientation and age; and equality for those in our society about how they live their lives and the opportunities they should receive. Values of family stand whatever the make-up of that family is. Families today are very different to what history would define a family to be. Regardless of how a family is made up, the support from a family unit is now as important to individuals and to the community as ever."
|
||||
22,2015,"Hanson-Young, Sen Sarah",0,AG,SA,"The Marriage Equality Amendment Bill 2013 seeks to amend the Marriage Act 1961 to provide equality for same-sex couples. The Bill removes the existing discrimination in the federal Marriage Act that confines marriage to between a man and a woman. It redefines marriage as being between two people regardless of their sex, sexual orientation or gender identity. The Australian Greens share the view of the majority of Australians that the time for marriage equality has come. The call for marriage equality has huge community momentum and it is growing day by day. The most recent national survey found that 62% of Australians believe same-sex couples should be able to marry, and 75% of Australians believe federal reform is inevitable."
|
||||
23,2016,"Hanson-Young, Sen Sarah",0,AG,SA," This is the fourth time I have introduced a bill for marriage equality. The Senate conducted in-depth inquires into two of my bills which highlighted the significant community interest in marriage equality. In 2009, the inquiry into my bill received more than 25,000 submissions. The senate inquiry into my second bill in 2012 set a new record of 75,000 submissions, the majority of which were supportive of the bill. It is clear there is enormous community passion for this. The inquiry into my bill recommended that, with a few small amendments, the bill should pass. Sadly, in 2012 we saw votes in both the House of Representatives and in the Senate on government marriage equality bills that were carefully orchestrated to fail. The Leader of the Opposition chose to buck the traditional approach of his party by refusing Coalition members a conscience vote. Meanwhile the then Prime Minister failed to have her members support the ALP's own policy of supporting marriage equality by insisting on a conscience vote."
|
||||
24,2016,"Hanson-Young, Sen Sarah",0,AG,SA," In Australia, the states and territories are continuing to show the way, with Greens and non-Greens bills for state marriage equality across the nation. States such as Tasmania, South Australia and New South Wales have shown cross-party cooperation and are closer to achieving marriage equality than ever before. Since I introduced my first marriage equality bill, many state Parliaments have acted to deliver human rights for Gay Lesbian Bisexual Transgender and Intersex Australians. While no substitute for marriage, Tasmania has legislated for civil unions and New South Wales has legislated for same-sex parenting rights. These are important and historic reforms and the efforts of MPs and advocates in our states and territories are to be admired and acknowledged. But it is federal marriage equality that the community wants most of all. That is why today I stand to introduce this Bill, to assure gay and lesbian Australians that there are people in this place who believe all relationships should be recognised as equal under the law."
|
||||
25,2016,"Hanson-Young, Sen Sarah",0,AG,SA,"Marriage provides couples, families and the general community with a universal language for love, commitment and relationships. It is also one of the universal legal and social institutions through which we find connection and belonging, not only with our partner, but with our families and communities. It is time for Australia to join comparable countries like the United Kingdom, France and our neighbours, New Zealand, and legislate for marriage equality. I know that there are good people in this place that support equality for all Australians, and that is why today I am calling on those members to co-sponsor my Bill. It is now time for parliamentarians from across political divides to work together and deliver on federal marriage equality. It is only through the combined efforts that we can really achieve equality for all. Senator HANSON-YOUNG: I seek leave to make a very short statement. The PRESIDENT: Leave is granted for one minute, Senator Hanson-Young."
|
||||
26,2016,"Hanson-Young, Sen Sarah",0,AG,SA,"At the exact same time that I have introduced this bill, we have just heard reports that the ACT marriage laws have been overturned by the High Court. This means that this bill is more important than ever for people living not just in the ACT in Canberra but throughout our entire nation who want to see progression towards marriage equality. It is an important day but obviously a very sad day for people who have only just had their marriages celebrated over the weekend and the days following. There will be a lot of broken hearts here in Canberra this afternoon. I seek leave to continue my remarks later. "
|
||||
27,2016,"Leyonhjelm, Sen David",1,LDP,NSW,"However, it is also true that, now Malcolm Turnbull is Prime Minister, my original fear Tony Abbott sabotaging the plebiscite process will not come to pass. I had visions of a question so convoluted it made no sense, or something absurd enough for a Monty Python sketch perhaps from the hallowed halls of the Philosophy Department at the University of Woolloomooloo! Despite my reservations about voting on other people's rights, I do think that, if we are stuck with a plebiscite, we should hold it at the same time as the next federal election. Holding the plebiscite at the same time as the next election has two advantages. The first is that we save $160 million of taxpayers' money what the AEC advises a stand-alone plebiscite would cost. The second is there will be less of a spotlight on the issue, and we can moderate the worst excesses of the people who have taken the time to write me so many lovely, lovely letters so far! Elections are always about a range of issues same-sex marriage would be just one among many. I commend the bill to the Senate. "
|
||||
28,2017,"Dastyari, Sen Sam",1,ALP,QLD,"I want to begin by acknowledging that a lot of other senators have done a lot of work in this space over the past few years, especially in terms of different pieces of legislation and different approaches. I want to acknowledge that, as he pointed out, Senator Leyonhjelm had introduced a bill into this chamber. Others, including Senator Wong and cross-party people like Senator Hanson-Young, have done a lot of work in this space, and I think it should be acknowledged. I commend Senator Simms for bringing this debate to us; it is an important debate for us to have. I want to reject this idea that says that we cannot have a sophisticated debate or that we should not have this debate simply because it is too divisive or it has already been said. I think this is an important part of social policy, and it is an important role of the Senate to talk about it."
|
||||
29,2017,"Dastyari, Sen Sam",1,ALP,QLD,"I do want to put my views on record. I do not believe I have ever spoken publicly about my views on marriage equality. I have been reported and recorded in different kinds of lists that get emailed around as being a supporter of marriage equality. I certainly am, but I am also a very big supporter of an acknowledgement of the fact that there are a range of very different views. There are certainly a range of different views within my political party, the Australian Labor Party. For me, the notion that we would not support marriage equality is something that I find difficult to understand. I find the arguments against it difficult to understand. I think the case for marriage equality is there. I acknowledge there are others, however, who do not necessarily share those views. When we are talking about a debate like this, a debate that needs to be focused on understanding and tolerance, it is very important that we recognise that there are going to be so many different views out there. "
|
||||
30,2017,"Dastyari, Sen Sam",1,ALP,QLD,"I will be honest: I think a lot of this is a generational challenge and issue. It is a generational issue as well. The data and the research overwhelmingly demonstrate that younger Australians tend to have a much stronger view towards supporting marriage equality than older Australians. The challenge and opportunity will be, as this happens, for us to be able to build on that. I support marriage equality. I support this bill. In doing so, I also support the Labor Party position, which is that we have to recognise and understand that not everyone shares the same view and that there are people who even though I disagree with their arguments and I disagree with their judgement have, through good faith, come to a different view. It is a fact that there is a changing debate out there in the community. The views of the community are starting to shift and have shifted dramatically on this issue. That is a positive thing and I believe and hope that, during my time in this chamber, there will be an opportunity to vote for a bill providing for marriage equality that will eventually be supported."
|
||||
31,2017,"Canavan, Sen Matthew",1,Nats,QLD,"I would like to start off where Senator Dastyari left off: I think all views should be respected in this debate, and I certainly respect colleagues, friends and family who have a different view to my own which is in support of traditional marriage. I also want to say, up-front, that I support the removal of any practical discrimination against homosexuals and same-sex couples. We should ensure that happens. I believe we have largely achieved that, over a period of time. However, if there are instances of discrimination against same-sex couples, they should be removed. Whether or not the definition of marriage in our existing legislation is discrimination itself is a matter for debate, in my view. It is a matter that we need to consider very clearly. We should consider it very clearly because the definition in our Marriage Act today has been the same for centuries for millennia. Not just in our culture and not just in the religions that have been common in this country but in almost every culture in the world, over the span of millennia, marriage has been an institution that enshrines the union of a man and a woman, often with the intention of creating children. "
|
||||
32,2017,"Rice, Sen Janet",0,AG,VIC,"This is going to continue until we vote to legalise same-sex marriage, because this is the last state sanctioned discrimination against same-sex attracted and transgendered people, and it is the most important state sanctioned discrimination that still exists. Now in 2016, after 17 bills in the parliament 17 bills since 2004 and after countless hours of debate we can take a crucial step on the path to equal love right now. We are now debating Sarah Hanson-Young's bill, but we have had enough talk. Let's bring on the vote. It is long past time. If we ring the bells now we can be hearing wedding bells in no time. We do not need extra time for debate. The Labor Party was confident that the numbers were there earlier this week, so let's get this long overdue reform through the Senate and get it through today. We need a vote this morning. Labor, it seems, want to vote to keep on talking. We want to vote to change the law. "
|
||||
33,2017,"Rice, Sen Janet",0,AG,VIC,"If Labor's position on same-sex marriage were motivated by a genuine desire to win the vote, not to sabotage Senate voting reform, then there is no reason why we cannot just vote on the bill this morning, without delay. We are ready to vote. We are ready to vote 'yes'. But we are not going to be bullied on unrelated pieces of legislation. We have got time this morning to vote and we can do it, so I am calling on Labor. Labor are saying that they are committed to equal marriage. Labor are saying they want to have a vote on equal marriage, so I am calling on Labor to allow it. Labor, we can do it this morning. There are people's lives that are dependent on this. There is Penny's and my life, there are other people in this parliament's lives, there are the lives of the people that I have talked about this morning. "
|
||||
34,2017,"Rice, Sen Janet",0,AG,VIC,"I am calling on Labor to support this bill going to a vote this morning. If Labor do not support this, maybe it is because they are not confident of getting the outcome, and they are not confident in their numbers. If Labor do not support bringing on a vote this morning then maybe it is just a cynical exploitation of an issue that means so much to so many. And if Labor does not allow a vote today then they are going to be responsible for missing an historic opportunity to vote on this issue. We are ready to bring on a vote today, and we can guarantee with a vote today that every one of our MPs will vote unanimously for marriage equality, because we have done that every vote, every MP, every time. Every time there has been opportunity to vote on marriage equality the Greens have done it 17 bills in this parliament. We will continue to vote for marriage equality because it is so important for ending discrimination against lesbian, gay, bisexual, transgender and intersex people. We want to see marriage equality. We need to see marriage equality. Now is the time. We can vote on this today, and I call on all people in this place today to be voting for marriage equality, to be voting for love, to be voting for those wedding bells to be ringing as soon as possible. "
|
||||
35,2018,"Macdonald, Sen Ian",1,LP,QLD,"In my long time in this chamber I have heard a number of speeches that were dripping with hypocrisy and insincerity, but the one I heard this morning from the Leader of the Opposition on a formal motion a procedural motion relating to the same subject just about took the cake. I want to explain to those who might be listening to the debate what the coalition's position is. It starts from the premise that, if you are a member of the Liberal and National parties, you go to an election and make a promise, and you make the promise intending that promise to be kept. This subject of same-sex marriage is one that I know raises a lot of emotions on both sides of the debate, and I understand the emotion that is engendered on both sides of the debate. But this is an issue that has been around for some time and the coalition have a policy on it. We went to the last election saying that the definition of marriage would stay the same as it is in the Marriage Act for this term of parliament, and that is the commitment we took to the Australian people. I remember that, at the time, the coalition thought long and hard about that policy. We were petitioned by the church groups, if I can loosely label them as that. They had very, very strong views on it and they made a point which resonated with the coalition as a whole. We decided to go to the last election with this commitment to retain the definition of marriage as it is in the Marriage Act."
|
||||
36,2018,"Macdonald, Sen Ian",1,LP,QLD,"I know Labor senators find it hard to believe that a political party would make a promise, intending to keep it, and then actually keep it. I know that is foreign to the Australian Labor Party. We all remember the promise by the Labor Party: 'There will be no carbon tax under a government I lead.' Having been elected on that promise, what was the first thing that an Australian Labor government did when it took the reins? It introduced a carbon tax the direct opposite of what they had promised before the election. That is not a one-off. In the last few days, I have raised a number of times the Keating Labor Party's l-a-w law tax reductions. Remember that? Some senators might have been around then, as I was. Thinking they were going to lose the next election, Mr Keating and the Labor Party actually legislated for tax cuts before the election. It was passed and Mr Keating said: 'It's l-a-w law. These tax cuts will happen. They have been legislated.' Lo and behold, unexpectedly, Mr Keating and the Labor Party won that election. What was the first thing that they did, the first legislative program that they indulged in on being returned to government? It was to renege, to cancel, to abolish that bill giving what was then called the l-a-w law tax cuts."
|
||||
37,2018,"Hanson-Young, Sen Sarah",0,AG,SA,"RECOGNITION OF FOREIGN MARRIAGES BILL 2014 This Bill amends the Marriage Act 1961 so that same-sex marriages that were validly entered into in foreign countries can be recognised in Australia. Currently international marriages that are entered into by different-sex couples are legally recognised under Australian law. However marriages entered into by same-sex couples are barred from recognition through an explicit prohibition in the Marriage Act. This Bill removes this prohibition and affords full recognition of overseas marriage to couples when they return to Australia, regardless of their gender or sexual orientation. The introduction of this Bill comes at a time when likeminded countries around the world are embracing equality. Britain recently celebrated their first same-sex marriages with couples from all corners of the world travelling great lengths to be the first to marry on British soil. Rainbow flags were hung all over the country to celebrate the occasion and people rejoiced when the first couples said I do""."""
|
||||
38,2018,"Hanson-Young, Sen Sarah",0,AG,SA,"On the other side of the world the rainbow of equality has stretched across the Tasman Sea to join two neighbouring countries, Australia and New Zealand. Since New Zealand legislated for marriage equality last year, over 300 Australians have made the journey to have their love and commitment for one another legally recognised. Those couples will now join the many Australians who have been travelling the world over the past decade to get married, only to come back home to the country that they love to find that their marriage is not recognised. Despite being legally married in the foreign country, in their homeland they step off the plane and have to leave their marriage at the customs gate. This is not in the Australian spirit, particularly when public support for marriage equality is at an all-time high. Over 65 per cent of Australians want to see marriage equality happen. However, as we wait for others to accept the inevitable, we should at least recognise the marriages of all couples ì lesbian, gay and straight ì who have legally married overseas."
|
||||
39,2018,"Hanson-Young, Sen Sarah",0,AG,SA,"This Bill offers a modest and practical step forward to marriage equality and it is consistent with the foundational Australian ideal of equality before the law. The marriages that are the subject of this Bill have been entered into by the parties with sincerity and commitment and are valid marriages under the law of the country where they were solemnised. The couples have gone to the effort and emotional investment of organising a wedding in a foreign country, often at great expense and involving family and friends from Australia, and they have made vows that would be life-long if they were to remain in the country where the wedding was held. The solemnity of the vows that these couples made overseas should be recognised by Australia's Parliament and people. By recognising same-sex marriages from overseas, as we do with all other marriages, this Bill will help gay and lesbian Australians who are in loving relationships get the recognition that they deserve."
|
||||
40,2018,"Hanson-Young, Sen Sarah",0,AG,SA,"Australia will not be alone in recognising international same-sex marriages. Israel, Slovenia and Japan are just some of the countries which recognise the marriages entered into in foreign countries by same-sex couples without having domestic laws to perform same-sex marriage. Couples from those countries can marry in one of the fourteen countries which have marriage equality, such as Argentina, Britain, France or New Zealand, and then return to have their marriage recognised under the laws of their homeland. This Bill is the first step for Australia along the road to marriage equality and an important one at that. Most Australians understand that the time for marriage equality came long ago and it's the Australian Parliament's duty to catch up. I commend this Bill to the Senate. Senator HANSON-YOUNG: I seek leave to continue my remarks later. Leave granted; debate adjourned. "
|
||||
|
BIN
topic_model/.DS_Store
vendored
Normal file
BIN
topic_model/.DS_Store
vendored
Normal file
Binary file not shown.
192
topic_model/generate_models_fromapp.py
Normal file
192
topic_model/generate_models_fromapp.py
Normal file
@@ -0,0 +1,192 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# # 0. Import all the Required Packages
|
||||
import nltk
|
||||
import re
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from pprint import pprint
|
||||
from nltk import sent_tokenize
|
||||
import glob, time, gc, datetime
|
||||
|
||||
# Gensim
|
||||
import gensim
|
||||
import gensim.corpora as corpora
|
||||
from gensim.utils import simple_preprocess
|
||||
from gensim.models import CoherenceModel
|
||||
from gensim.models.phrases import Phrases, Phraser
|
||||
|
||||
# spacy for lemmatization
|
||||
import spacy
|
||||
import os
|
||||
|
||||
|
||||
# # 1. Preprocess the Documents and store the Documents in a .pkl file
|
||||
# - Input: All the .csv files
|
||||
# - Output: Processed content .pkl files
|
||||
|
||||
def build_model(df_full,corpus_name,content_col):
|
||||
#corpus_name : must new a folder with the same name in the "./topic_model/"
|
||||
|
||||
ID_col_name = ""
|
||||
start_time = time.time()
|
||||
|
||||
try:
|
||||
df = pd.DataFrame.from_records(df_full,columns=[content_col])
|
||||
df.columns=["body"]
|
||||
df["ID"] = [x for x in range(1, len(df.values)+1)]
|
||||
|
||||
path = os.path.join("./topic_model/", corpus_name)
|
||||
|
||||
try:
|
||||
os.mkdir(path)
|
||||
except OSError as error:
|
||||
return str(error)
|
||||
|
||||
file_name = '/selected_content_' + corpus_name + '.pkl'
|
||||
df.to_pickle(path+file_name)
|
||||
print("load ok")
|
||||
|
||||
except:
|
||||
print("reading failed", time.time() - start_time)
|
||||
exit(-1)
|
||||
|
||||
|
||||
# NLTK Stop words
|
||||
from nltk.corpus import stopwords
|
||||
# nltk.download('stopwords')
|
||||
stop_words = stopwords.words('english')
|
||||
stop_words.extend(['from', 'subject', 're', 'edu', 'use'])
|
||||
|
||||
# Initialize spacy 'en' model, keeping only tagger component (for efficiency)
|
||||
# python3 -m spacy download en
|
||||
nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])
|
||||
|
||||
|
||||
def doc_to_words(sentences):
|
||||
for sentence in sentences:
|
||||
yield(gensim.utils.simple_preprocess(str(sentence), deacc=True)) # deacc=True removes punctuations
|
||||
|
||||
# Define functions for stopwords, bigrams, trigrams and lemmatization
|
||||
def remove_stopwords(texts):
|
||||
return [[word for word in simple_preprocess(str(doc)) if word not in stop_words] for doc in texts]
|
||||
|
||||
def make_bigrams(texts):
|
||||
return [bigram_mod[doc] for doc in texts]
|
||||
|
||||
def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
|
||||
"""https://spacy.io/api/annotation"""
|
||||
texts_out = []
|
||||
for sent in texts:
|
||||
doc = nlp(" ".join(sent))
|
||||
texts_out.append([token.lemma_ for token in doc if token.pos_ in allowed_postags])
|
||||
return texts_out
|
||||
|
||||
|
||||
total_time_for_one_doc = time.time()
|
||||
|
||||
ID_list=df.ID.values.tolist()
|
||||
data = df.body.values.tolist()
|
||||
|
||||
# Remove new line characters
|
||||
|
||||
data = [sent.replace('\\n', ' ') for sent in data if type(sent) is str]
|
||||
|
||||
data = [sent.replace('\n', ' ') for sent in data if type(sent) is str]
|
||||
|
||||
data = [sent.replace('.', '. ') for sent in data if type(sent) is str]
|
||||
|
||||
data = [sent.replace(' ', ' ') for sent in data if type(sent) is str]
|
||||
|
||||
|
||||
gc.collect()
|
||||
|
||||
print("1. Converting document to words for", file_name, "...", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
data = list(doc_to_words(data))
|
||||
print("Converting doc to word time:", time.time() - start)
|
||||
|
||||
gc.collect()
|
||||
|
||||
# Build the bigram model
|
||||
print("2. Building the bigram model for", file_name, "...", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
bigram = gensim.models.Phrases(data, min_count=5, threshold=100) # higher threshold fewer phrases.
|
||||
print("Building Bigram:", time.time() - start)
|
||||
|
||||
# Faster way to get a sentence clubbed as a trigram/bigram
|
||||
print("3. Building the bigram model for", file_name, "...", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
bigram_mod = gensim.models.phrases.Phraser(bigram)
|
||||
print("Building Bigram Model:", time.time() - start)
|
||||
|
||||
# Remove Stop Words
|
||||
print("4. Removing stop words for", file_name, "...", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
data = remove_stopwords(data)
|
||||
print("Time spent on removing stopwords:", time.time() - start)
|
||||
|
||||
# Form Bigrams
|
||||
print("5. Forming bigrams for", file_name, "...", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
data = make_bigrams(data)
|
||||
print("Time spent on forming bigrams:", time.time() - start)
|
||||
|
||||
# Do lemmatization keeping only noun, adj, vb, adv
|
||||
print("6. Lemmatizing", file_name, "...", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
data = lemmatization(data, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])
|
||||
print("Time spent on lemmatizing:", time.time() - start)
|
||||
|
||||
|
||||
print("7. Writing into pickle...", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
processed_df = pd.DataFrame([[ID_list,data]], columns = ['ID','body'])
|
||||
pkl_file_name = "./topic_model/"+corpus_name+"/processed_content_" + corpus_name + '.pkl'
|
||||
processed_df.to_pickle(pkl_file_name)
|
||||
|
||||
print("Total process time for one document", time.time() - total_time_for_one_doc, str(datetime.datetime.now()).split('.')[0])
|
||||
|
||||
|
||||
|
||||
# # 2. Create the Dictionary from the Processed Content
|
||||
# - Input: Processed Content .pkl files
|
||||
# - Output: Dictionary (gensim.Dictionary.id2word file)
|
||||
|
||||
|
||||
print("Start Reading:", str(datetime.datetime.now()).split('.')[0])
|
||||
start = time.time()
|
||||
id2word = corpora.Dictionary(pd.read_pickle(pkl_file_name).body.values.tolist()[0])
|
||||
print(len(id2word))
|
||||
|
||||
id2word.add_documents(pd.read_pickle(pkl_file_name).body.values.tolist()[0])
|
||||
gc.collect()
|
||||
|
||||
print("Read time:", time.time() - start)
|
||||
|
||||
id2word.save("./topic_model/"+corpus_name+"/content_dictionary_"+corpus_name)
|
||||
|
||||
|
||||
# 3. Form the Corpus with the Dictionary and Processed Content
|
||||
# - Input: Dictionary (gensim.Dictionary.id2word file) & Processed Content .pkl files
|
||||
# - Output: Corpus .pkl files
|
||||
|
||||
print("Start Reading:", str(datetime.datetime.now()).split('.')[0])
|
||||
total = time.time()
|
||||
corpus = []
|
||||
|
||||
start = time.time()
|
||||
data = pd.read_pickle(pkl_file_name).body.values.tolist()[0]
|
||||
corpus = [id2word.doc2bow(text) for text in data]
|
||||
print("length of data:", len(data), "; length of corpus", len(corpus))
|
||||
corpus_df = pd.DataFrame([[corpus]], columns = ['corpus'])
|
||||
print("Shape of the corpus in this iteration:", corpus_df.shape)
|
||||
save_file_name = "./topic_model/"+corpus_name+"/corpus_" + corpus_name + ".pkl"
|
||||
corpus_df.to_pickle(save_file_name)
|
||||
|
||||
print("Total time:", time.time() - total)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
BIN
topic_model/sample_data/content_dictionary_sample_data
Normal file
BIN
topic_model/sample_data/content_dictionary_sample_data
Normal file
Binary file not shown.
BIN
topic_model/sample_data/corpus_sample_data.pkl
Normal file
BIN
topic_model/sample_data/corpus_sample_data.pkl
Normal file
Binary file not shown.
BIN
topic_model/sample_data/processed_content_sample_data.pkl
Normal file
BIN
topic_model/sample_data/processed_content_sample_data.pkl
Normal file
Binary file not shown.
BIN
topic_model/sample_data/sample_data.model
Normal file
BIN
topic_model/sample_data/sample_data.model
Normal file
Binary file not shown.
BIN
topic_model/sample_data/selected_content_sample_data.pkl
Normal file
BIN
topic_model/sample_data/selected_content_sample_data.pkl
Normal file
Binary file not shown.
59
topic_model/word2vec.py
Normal file
59
topic_model/word2vec.py
Normal file
@@ -0,0 +1,59 @@
|
||||
import gensim
|
||||
from gensim.models.phrases import Phrases, Phraser
|
||||
import pandas as pd
|
||||
from gensim.test.utils import datapath
|
||||
from gensim.models import Word2Vec
|
||||
import sys
|
||||
def train_model(corpus_name):
|
||||
# build vocabulary and train model
|
||||
# splited_documents = []
|
||||
# for doc in documents:
|
||||
# splited_documents.append(doc.split())
|
||||
|
||||
# phrases = Phrases(splited_documents, min_count=30, progress_per=10000)
|
||||
# bigram = Phraser(phrases)
|
||||
# #print phrases
|
||||
# sentences = bigram[splited_documents]
|
||||
|
||||
processed_file_name = "./topic_model/"+corpus_name+"/processed_content_" + corpus_name + '.pkl'
|
||||
sentences=pd.read_pickle(processed_file_name).body.values.tolist()[0]
|
||||
|
||||
phrases = Phrases(sentences, min_count=10, progress_per=10000)
|
||||
bigram = Phraser(phrases)
|
||||
sentences = bigram[sentences]
|
||||
|
||||
model = gensim.models.Word2Vec(
|
||||
sentences,
|
||||
size=200, # the size of the dense vector to represent each token
|
||||
window=50, # +/- "window" number of words are counted as neighbors
|
||||
min_count=1, # minimium frequency count of words
|
||||
workers=10, # the number of threads to use behind the scenes
|
||||
iter=20 # number of iterations over the corpus
|
||||
)
|
||||
|
||||
model.save("./topic_model/"+corpus_name+"/"+corpus_name+".model")
|
||||
|
||||
#print model.findSynonyms("innovation", 5).show(5)
|
||||
|
||||
return True
|
||||
|
||||
def find_similar(corpus_name, term_list,top_n=50):
|
||||
model = Word2Vec.load("./topic_model/"+corpus_name+"/"+corpus_name+".model")
|
||||
top_term_list=[]
|
||||
for term in term_list:
|
||||
if len(term.split())>1:#a phrase
|
||||
term_list.append("_".join(term.split()))
|
||||
term_list.remove(term)
|
||||
print(term_list)
|
||||
for i in model.wv.most_similar(positive=term_list, topn=top_n):
|
||||
top_term_list.append(i)
|
||||
print(top_term_list)
|
||||
return top_term_list
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2:
|
||||
print("""\n Usage: python {} corpus_name
|
||||
""".format(sys.argv[0]))
|
||||
sys.exit(-1)
|
||||
train_model(sys.argv[1])
|
||||
|
||||
BIN
uploads/.DS_Store
vendored
Normal file
BIN
uploads/.DS_Store
vendored
Normal file
Binary file not shown.
BIN
whoosh_search/.DS_Store
vendored
Normal file
BIN
whoosh_search/.DS_Store
vendored
Normal file
Binary file not shown.
582
whoosh_search/preprocess_corpus.py
Normal file
582
whoosh_search/preprocess_corpus.py
Normal file
@@ -0,0 +1,582 @@
|
||||
|
||||
import io
|
||||
import sys
|
||||
import os
|
||||
import shutil
|
||||
from backports import csv
|
||||
|
||||
from whoosh import index
|
||||
from whoosh import fields
|
||||
from whoosh.fields import Schema, ID, TEXT, NGRAMWORDS
|
||||
from whoosh.analysis import StemmingAnalyzer
|
||||
from whoosh.analysis import StandardAnalyzer
|
||||
from whoosh.analysis import NgramWordAnalyzer
|
||||
|
||||
from whoosh.qparser import QueryParser
|
||||
from whoosh.query import spans
|
||||
from whoosh import query
|
||||
|
||||
from whoosh.reading import IndexReader
|
||||
from whoosh.matching import Matcher
|
||||
|
||||
from spacy.lang.en import English
|
||||
|
||||
import re
|
||||
|
||||
import pandas as pd
|
||||
|
||||
nlp = English()
|
||||
sentencizer = nlp.create_pipe("sentencizer")
|
||||
nlp.add_pipe(sentencizer)
|
||||
|
||||
|
||||
tokenizer = nlp.Defaults.create_tokenizer(nlp)
|
||||
|
||||
def get_schema():
|
||||
ana = StandardAnalyzer()
|
||||
return Schema(content=TEXT(stored = True, phrase=True, analyzer=ana),
|
||||
id=ID(unique=True, stored=True),
|
||||
title=TEXT(stored = True, phrase=True, analyzer=ana),
|
||||
year=TEXT(stored=True),
|
||||
author=TEXT(stored=True)) #
|
||||
|
||||
#index all documents of a new corpus
|
||||
def add_new_corpus(index_dir,corpus,id_col=-1,text_col=4,title_col=2,year_col=19):
|
||||
|
||||
doc_no_year={}
|
||||
doc_len_dict={}
|
||||
|
||||
ix = index.create_in(index_dir, schema=get_schema())
|
||||
writer = ix.writer()
|
||||
|
||||
# read relevant documents to whoosh
|
||||
with io.open(corpus, encoding='latin1') as f:
|
||||
r = csv.reader(f)
|
||||
header = next(r)
|
||||
line_no=0
|
||||
|
||||
for line in r:
|
||||
|
||||
if id_col==(-1):
|
||||
articleID=str(line_no)
|
||||
else:
|
||||
articleID=str(line[id_col])
|
||||
|
||||
article_text = u''+line[text_col]+''
|
||||
doc = nlp(article_text)
|
||||
sen_no = 0
|
||||
doc_len = 0
|
||||
|
||||
for s in list(doc.sents):
|
||||
sent_text=str(s)
|
||||
sentID = articleID+"_"+str(sen_no)
|
||||
title = u''+line[title_col]+''
|
||||
year = u''+line[year_col]+''
|
||||
writer.add_document(content=u""+sent_text, id=u""+sentID, title=u""+title, year=u""+year)
|
||||
|
||||
sen_no+=1
|
||||
|
||||
doc_len+=len(sent_text.split())
|
||||
|
||||
if doc_len in doc_len_dict:
|
||||
doc_len_dict[doc_len]+=1
|
||||
else:
|
||||
doc_len_dict[doc_len]=1
|
||||
|
||||
if year in doc_no_year:
|
||||
doc_no_year[year]+=1
|
||||
else:
|
||||
doc_no_year[year]=1
|
||||
|
||||
line_no+=1
|
||||
|
||||
f.close()
|
||||
|
||||
writer.commit()
|
||||
|
||||
f2 = open(index_dir+"doc_num", "w")
|
||||
f2.write(str(line_no)+"\n")
|
||||
for year in doc_no_year:
|
||||
text=year+" "+str(doc_no_year[year])+"\n"
|
||||
f2.write(text)
|
||||
f2.close()
|
||||
|
||||
f3 = open(index_dir+"doc_len", "w")
|
||||
for length in sorted(doc_len_dict):
|
||||
text=length+" "+str(doc_len_dict[length])+"\n"
|
||||
f3.write(text)
|
||||
f3.close()
|
||||
|
||||
|
||||
print("[ Indexing Finished. In total "+str(line_no)+" documents. ]")
|
||||
|
||||
def delete_corpus_from_app(index_dir):
|
||||
shutil.rmtree(index_dir)
|
||||
|
||||
def add_new_corpus_from_app(index_dir,corpus_dict,id_col,text_col,title_col,year_col,author_col,add_cols):
|
||||
|
||||
doc_no_year={}
|
||||
doc_len_dict={}
|
||||
|
||||
path = os.path.join("./whoosh_search", index_dir)
|
||||
try:
|
||||
os.mkdir(path)
|
||||
os.mkdir(path+"groups/")
|
||||
except OSError as error:
|
||||
return str(error)
|
||||
|
||||
print(path + " created.")
|
||||
|
||||
ix = index.create_in(path, schema=get_schema())
|
||||
|
||||
for col_name in add_cols:
|
||||
ix.add_field(col_name, fields.TEXT(stored=True))
|
||||
|
||||
writer = ix.writer()
|
||||
# read relevant documents to whoosh
|
||||
line_no=0
|
||||
|
||||
for line in corpus_dict:
|
||||
|
||||
if id_col=="use row number":
|
||||
articleID=str(line_no)
|
||||
else:
|
||||
articleID=str(line[id_col])
|
||||
|
||||
article_text = u''+str(line[text_col])+''
|
||||
doc = nlp(article_text)
|
||||
sen_no = 0
|
||||
doc_len = 0
|
||||
|
||||
for s in list(doc.sents):
|
||||
sent_text=str(s)
|
||||
sentID = articleID+"_"+str(sen_no)
|
||||
title = u''+str(line[title_col])+''
|
||||
year = u''+str(line[year_col])+''
|
||||
author = u''+str(line[author_col])+''
|
||||
args={"content":u""+sent_text,"id":u""+sentID,"title":u""+title,"year":u""+year,"author":u""+author}
|
||||
for col_name in add_cols:
|
||||
args[col_name]=u''+str(line[col_name])+''
|
||||
|
||||
|
||||
writer.add_document(**args)
|
||||
|
||||
sen_no+=1
|
||||
|
||||
doc_len+=len(sent_text.split())
|
||||
|
||||
if doc_len in doc_len_dict:
|
||||
doc_len_dict[doc_len]+=1
|
||||
else:
|
||||
doc_len_dict[doc_len]=1
|
||||
|
||||
if year in doc_no_year:
|
||||
doc_no_year[year]+=1
|
||||
else:
|
||||
doc_no_year[year]=1
|
||||
|
||||
line_no+=1
|
||||
|
||||
writer.commit()
|
||||
|
||||
f2 = open(path+"doc_num", "w")
|
||||
f2.write(str(line_no)+"\n")
|
||||
for year in doc_no_year:
|
||||
text=year+" "+str(doc_no_year[year])+"\n"
|
||||
f2.write(text)
|
||||
f2.close()
|
||||
|
||||
f3 = open(path+"doc_len", "w")
|
||||
for length in sorted(doc_len_dict):
|
||||
text=str(length)+" "+str(doc_len_dict[length])+"\n"
|
||||
f3.write(text)
|
||||
f3.close()
|
||||
|
||||
print("[ Indexing Finished. In total "+str(line_no)+" documents. ]")
|
||||
return True
|
||||
|
||||
def filter_corpus(corpus_ind_dir, query_list,year_from, year_to):
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
|
||||
with ix.searcher() as searcher:
|
||||
|
||||
parser = QueryParser("content", ix.schema)
|
||||
term_list_T=[]
|
||||
term_list_Y=[]
|
||||
|
||||
for t in query_list:
|
||||
t=re.sub(r'[^a-zA-Z0-9_ ]', '', t)
|
||||
splitted=t.split()
|
||||
if len(splitted)>1:
|
||||
term_list_T.append(query.Phrase("content", splitted))
|
||||
else:
|
||||
term_list_T.append(query.Term("content", t))
|
||||
|
||||
for y in range(year_from, year_to+1):
|
||||
term_list_Y.append(query.Term("year", str(y)))
|
||||
|
||||
q1 = query.Or(term_list_T)
|
||||
q2 = query.Or(term_list_Y)
|
||||
|
||||
q_f = query.And([q1,q2])
|
||||
|
||||
results = searcher.search(q_f,limit=None)
|
||||
|
||||
result_list=[]
|
||||
relevant_article_ids=[]
|
||||
i=0
|
||||
|
||||
for r in results:
|
||||
i+=1
|
||||
article_id=r["id"].split('_')[0]
|
||||
if not article_id in relevant_article_ids:
|
||||
relevant_article_ids.append(article_id)
|
||||
|
||||
new_corpus=[]
|
||||
for r_article_id in sorted(relevant_article_ids):
|
||||
article_id = r_article_id+"_"
|
||||
q=query.Prefix("id", article_id)
|
||||
x=0
|
||||
row_data={}
|
||||
for r in searcher.search(q,limit=None):
|
||||
if x==0:
|
||||
for key in r:
|
||||
if key == "content":
|
||||
row_data["sentences"]=r['content']
|
||||
x+=1
|
||||
elif key == "id":
|
||||
row_data["id"]=article_id[:-1]
|
||||
else:
|
||||
row_data[key] = r[key]
|
||||
|
||||
else:
|
||||
sent = " "+r['content']
|
||||
row_data["sentences"]+=sent
|
||||
new_corpus.append(row_data)
|
||||
|
||||
pd_save = pd.DataFrame.from_records(new_corpus)
|
||||
cols = ['id'] + [col for col in pd_save if col != 'id']
|
||||
pd_save = pd_save[cols]
|
||||
return pd_save.to_csv(encoding='utf-8')
|
||||
|
||||
|
||||
# search by query
|
||||
def search_corpus(corpus_ind_dir, query_list,year_from, year_to,top_n=100): #the query term in the list will be connected by OR
|
||||
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
|
||||
with ix.searcher() as searcher:
|
||||
|
||||
parser = QueryParser("content", ix.schema)
|
||||
term_list_T=[]
|
||||
term_list_Y=[]
|
||||
|
||||
for t in query_list:
|
||||
t=re.sub(r'[^a-zA-Z0-9_ ]', '', t)
|
||||
splitted=t.split()
|
||||
if len(splitted)>1:
|
||||
term_list_T.append(query.Phrase("content", splitted))
|
||||
else:
|
||||
term_list_T.append(query.Term("content", t))
|
||||
|
||||
for y in range(year_from, year_to+1):
|
||||
term_list_Y.append(query.Term("year", str(y)))
|
||||
|
||||
q1 = query.Or(term_list_T)
|
||||
q2 = query.Or(term_list_Y)
|
||||
|
||||
q_f = query.And([q1,q2])
|
||||
|
||||
|
||||
results = searcher.search(q_f,limit=None)
|
||||
|
||||
result_list=[]
|
||||
relevant_article_ids=[]
|
||||
i=0
|
||||
|
||||
for r in results:
|
||||
i+=1
|
||||
article_id=r["id"].split('_')[0]
|
||||
if not article_id in relevant_article_ids:
|
||||
relevant_article_ids.append(article_id)
|
||||
|
||||
if i<=top_n:
|
||||
row_data = {}
|
||||
|
||||
row_data["id"] = r["id"]
|
||||
row_data["Year"] = r["year"]
|
||||
row_data["Sentence"] = r["content"].lower()#snipet
|
||||
row_data["Title"] = r["title"].lower()
|
||||
row_data["Author"] = r["author"]
|
||||
row_data["Document"] = r["content"].lower()
|
||||
|
||||
for key in r:
|
||||
if key in ["content", "id", "title", "year", "author"]:
|
||||
continue
|
||||
else:
|
||||
row_data[key]=r[key]
|
||||
|
||||
row_data["Score"] = round(r.score,3)
|
||||
|
||||
result_list.append(row_data)
|
||||
|
||||
with open(corpus_ind_dir+"/doc_num") as f:
|
||||
total_doc_no = 0
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
doc_num=line.strip().split()
|
||||
if len(doc_num)>=2:
|
||||
if ((int(doc_num[0])>=year_from) & (int(doc_num[0])<=year_to)):
|
||||
total_doc_no+=int(doc_num[1])
|
||||
|
||||
f.close()
|
||||
|
||||
return [result_list, len(results), len(searcher.search(q2,limit=None)), len(relevant_article_ids),total_doc_no]
|
||||
|
||||
def check_sf(corpus_ind_dir,query_list):
|
||||
query_l=[]
|
||||
sf=[]
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
with ix.searcher() as searcher:
|
||||
for t in query_list:
|
||||
t=re.sub(r'[^a-zA-Z0-9_ ]', '', t)
|
||||
splitted=t.split()
|
||||
if len(splitted)>1:
|
||||
docfreq=len(searcher.search(query.Phrase("content", splitted),limit=None))
|
||||
t='_'.join(splitted)
|
||||
else:
|
||||
docfreq = searcher.doc_frequency("content", t)
|
||||
|
||||
query_l.append(t)
|
||||
sf.append(docfreq)
|
||||
return (query_l, sf)
|
||||
|
||||
def check_df_year(corpus_ind_dir,query_list,year_from,year_to):
|
||||
sf={}
|
||||
df={}
|
||||
rel_article_no={}
|
||||
term_list_Y=[]
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
|
||||
for y in range(year_from, year_to+1):
|
||||
term_list_Y.append(query.Term("year", str(y)))
|
||||
|
||||
q2 = query.Or(term_list_Y)
|
||||
|
||||
with ix.searcher() as searcher:
|
||||
for t in query_list:
|
||||
relevant_article_ids=[]
|
||||
if "+" in t: #AND
|
||||
t_list=t.split("+")
|
||||
term_list_T_AND=[]
|
||||
for tx in t_list:
|
||||
tx=re.sub(r'[^a-zA-Z0-9 ]', ' ', tx)
|
||||
splitted=tx.split()
|
||||
if len(splitted)>1:
|
||||
term_list_T_AND.append(query.Phrase("content", splitted))
|
||||
else:
|
||||
term_list_T_AND.append(query.Term("content", tx))
|
||||
q1=query.And(term_list_T_AND)
|
||||
elif "/" in t: #AND
|
||||
t_list=t.split("/")
|
||||
term_list_T_OR=[]
|
||||
for tx in t_list:
|
||||
tx=re.sub(r'[^a-zA-Z0-9 ]', ' ', tx)
|
||||
splitted=tx.split()
|
||||
if len(splitted)>1:
|
||||
term_list_T_OR.append(query.Phrase("content", splitted))
|
||||
else:
|
||||
term_list_T_OR.append(query.Term("content", tx))
|
||||
q1=query.Or(term_list_T_OR)
|
||||
|
||||
else: # single term
|
||||
t=re.sub(r'[^a-zA-Z0-9_ ]', '', t)
|
||||
splitted=t.split()
|
||||
if len(splitted)>1:
|
||||
q1=query.Phrase("content", splitted)
|
||||
t='_'.join(splitted)
|
||||
else:
|
||||
q1=query.Term("content", t)
|
||||
|
||||
|
||||
q_f = query.And([q1,q2])
|
||||
results=searcher.search(q_f,limit=None)
|
||||
t=t.replace(" ","_")
|
||||
|
||||
sf[t]={}
|
||||
df[t]={}
|
||||
for r in results:
|
||||
y=int(r["year"])
|
||||
if y in df[t]:
|
||||
sf[t][y]+=1
|
||||
else:
|
||||
sf[t][y]=1
|
||||
|
||||
article_id=r["id"].split('_')[0]
|
||||
|
||||
if not y in df[t]:
|
||||
df[t][y]=[article_id]
|
||||
else:
|
||||
if not article_id in df[t][y]:
|
||||
df[t][y].append(article_id)
|
||||
|
||||
if not article_id in relevant_article_ids:
|
||||
relevant_article_ids.append(article_id)
|
||||
|
||||
df_f={}
|
||||
for t in df:
|
||||
df_f[t]={}
|
||||
for y in df[t]:
|
||||
df_f[t][y]=len(df[t][y])
|
||||
|
||||
rel_article_no[t]=len(relevant_article_ids)
|
||||
|
||||
|
||||
return [sf,rel_article_no,df_f]
|
||||
|
||||
|
||||
def check_tf_year(corpus_ind_dir,query_list):
|
||||
|
||||
tf={}
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
with ix.searcher() as searcher:
|
||||
for t in query_list:
|
||||
t=re.sub(r'[^a-zA-Z0-9_ ]', '', t)
|
||||
splitted=t.split()
|
||||
if len(splitted)>1:
|
||||
results=searcher.search(query.Phrase("content", splitted),limit=None)#this is sentence frequency
|
||||
t='_'.join(splitted)
|
||||
tf[t]=0
|
||||
for r in results:
|
||||
tf[t]+=1
|
||||
else:
|
||||
results=searcher.frequency("content", t)
|
||||
|
||||
tf[t]=results
|
||||
|
||||
return tf
|
||||
|
||||
# find sentence frequency for a given list of query with AND: group_all==True, OR: group_all==False
|
||||
def check_group_sf_year(corpus_ind_dir,query_list,group_all):
|
||||
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
|
||||
with ix.searcher() as searcher:
|
||||
|
||||
parser = QueryParser("content", ix.schema)
|
||||
term_list_T=[]
|
||||
term_list_T_AND=[]
|
||||
|
||||
for t in query_list:
|
||||
if "+" in t: #AND
|
||||
t_list=t.split("+")
|
||||
term_list_T_AND=[]
|
||||
for tx in t_list:
|
||||
tx=re.sub(r'[^a-zA-Z0-9 ]', ' ', tx)
|
||||
splitted=tx.split()
|
||||
if len(splitted)>1:
|
||||
term_list_T_AND.append(query.Phrase("content", splitted))
|
||||
else:
|
||||
term_list_T_AND.append(query.Term("content", tx))
|
||||
|
||||
term_list_T.append(query.And(term_list_T_AND)) #AND
|
||||
|
||||
elif "/" in t: #OR
|
||||
t_list=t.split("/")
|
||||
term_list_T_OR=[]
|
||||
for tx in t_list:
|
||||
tx=re.sub(r'[^a-zA-Z0-9 ]', ' ', tx)
|
||||
splitted=tx.split()
|
||||
if len(splitted)>1:
|
||||
term_list_T_OR.append(query.Phrase("content", splitted))
|
||||
else:
|
||||
term_list_T_OR.append(query.Term("content", tx))
|
||||
|
||||
term_list_T.append(query.Or(term_list_T_OR)) #AND
|
||||
|
||||
else: #single term
|
||||
t=re.sub(r'[^a-zA-Z0-9 ]', ' ', t)
|
||||
splitted=t.split()
|
||||
if len(splitted)>1:
|
||||
term_list_T.append(query.Phrase("content", splitted))
|
||||
else:
|
||||
term_list_T.append(query.Term("content", t))
|
||||
|
||||
|
||||
if group_all:
|
||||
q = query.And(term_list_T)
|
||||
else:
|
||||
q = query.Or(term_list_T)
|
||||
|
||||
results = searcher.search(q, limit=None)
|
||||
sf={}
|
||||
df={}
|
||||
|
||||
for r in results:
|
||||
y=int(r['year'])
|
||||
article_id=r["id"].split('_')[0]
|
||||
|
||||
if y in sf:
|
||||
sf[y]+=1
|
||||
if not article_id in df[y]:
|
||||
df[y].append(article_id)
|
||||
else:
|
||||
sf[y]=1
|
||||
df[y]=[article_id]
|
||||
|
||||
for y in df:
|
||||
df[y]=len(df[y])
|
||||
|
||||
return (sf,df)
|
||||
|
||||
def get_num_doc_year(corpus_ind_dir):
|
||||
doc_num_year={}
|
||||
with open(corpus_ind_dir+"/doc_num") as f:
|
||||
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
doc_num=line.strip().split()
|
||||
if len(doc_num)>=2:
|
||||
doc_num_year[doc_num[0]]=doc_num[1]
|
||||
|
||||
f.close()
|
||||
return doc_num_year
|
||||
|
||||
def get_doc_len_freq(corpus_ind_dir):
|
||||
doc_len={}
|
||||
with open(corpus_ind_dir+"/doc_len") as f:
|
||||
|
||||
lines = f.readlines()
|
||||
|
||||
for line in lines:
|
||||
doc_len_l=line.strip().split()
|
||||
if len(doc_len_l)>=2:
|
||||
doc_len[int(doc_len_l[0])]=int(doc_len_l[1])
|
||||
|
||||
f.close()
|
||||
return doc_len
|
||||
|
||||
def top_terms(corpus_ind_dir,top_n):
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
tops = ix.reader().most_frequent_terms('content', number=top_n)
|
||||
top_t = {}
|
||||
for t in tops:
|
||||
top_t[str(t[1].decode("utf-8"))]=t[0]
|
||||
return top_t
|
||||
|
||||
def field_top_terms(corpus_ind_dir,field,top_n=20):
|
||||
ix = index.open_dir(corpus_ind_dir) #load index
|
||||
tops = ix.reader().most_frequent_terms(field, number=top_n)
|
||||
top_t = {}
|
||||
for t in tops:
|
||||
top_t[str(t[1].decode("utf-8"))]=t[0]
|
||||
return top_t
|
||||
|
||||
def get_fieldnames(corpus_ind_dir):
|
||||
fileds=index.open_dir(corpus_ind_dir).schema.stored_names()
|
||||
fileds.remove('content')
|
||||
fileds.remove('id')
|
||||
fileds.remove('title')
|
||||
return fileds
|
||||
0
whoosh_search/sample_data_index/MAIN_WRITELOCK
Executable file
0
whoosh_search/sample_data_index/MAIN_WRITELOCK
Executable file
BIN
whoosh_search/sample_data_index/MAIN_c310e1eqhhnzobbk.seg
Normal file
BIN
whoosh_search/sample_data_index/MAIN_c310e1eqhhnzobbk.seg
Normal file
Binary file not shown.
BIN
whoosh_search/sample_data_index/_MAIN_2.toc
Normal file
BIN
whoosh_search/sample_data_index/_MAIN_2.toc
Normal file
Binary file not shown.
38
whoosh_search/sample_data_index/doc_len
Normal file
38
whoosh_search/sample_data_index/doc_len
Normal file
@@ -0,0 +1,38 @@
|
||||
105 1
|
||||
110 1
|
||||
119 1
|
||||
140 1
|
||||
153 1
|
||||
156 1
|
||||
157 1
|
||||
158 2
|
||||
160 1
|
||||
161 1
|
||||
164 1
|
||||
165 1
|
||||
166 1
|
||||
168 1
|
||||
169 1
|
||||
171 1
|
||||
173 1
|
||||
177 1
|
||||
181 2
|
||||
182 1
|
||||
184 1
|
||||
185 1
|
||||
189 1
|
||||
194 1
|
||||
203 1
|
||||
206 1
|
||||
211 1
|
||||
215 1
|
||||
227 1
|
||||
228 1
|
||||
238 1
|
||||
254 1
|
||||
275 1
|
||||
278 1
|
||||
300 1
|
||||
311 1
|
||||
356 1
|
||||
455 1
|
||||
8
whoosh_search/sample_data_index/doc_num
Normal file
8
whoosh_search/sample_data_index/doc_num
Normal file
@@ -0,0 +1,8 @@
|
||||
40
|
||||
2012 6
|
||||
2013 7
|
||||
2014 4
|
||||
2015 5
|
||||
2016 5
|
||||
2017 7
|
||||
2018 6
|
||||
1
whoosh_search/sample_data_index/groups/SSM Test
Normal file
1
whoosh_search/sample_data_index/groups/SSM Test
Normal file
@@ -0,0 +1 @@
|
||||
{"base": "marriage", "added": ["sex", "same", "couple", "couples", "same sex", "marriages"], "groups": [[["same", "sex", "same sex"], ["marriage", "marriages"], ["same/sex/same sex", "marriage/marriages"], ["couple", "couples"], ["same/sex/same sex", "couple/couples"]], ["same+sex", "marriage", "SSM", "couple", "SSC"], [false, false, true, false, true], ["", "", "True", "", "True"]], "year": [2012, 2018]}
|
||||
Reference in New Issue
Block a user