First Commit ✔

This commit is contained in:
silverstone1903
2022-03-07 16:16:25 +03:00
commit b03365fcf4
11 changed files with 425 additions and 0 deletions

7
.gitignore vendored Normal file
View File

@@ -0,0 +1,7 @@
.ipynb_checkpoints
*/.ipynb_checkpoints/*
.Rhistory
*.R
*.ipynb
*.feather
*/*.feather

14
Dockerfile Normal file
View File

@@ -0,0 +1,14 @@
FROM python:3.7-slim
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
COPY ./app ./app
COPY ./data ./data
COPY ./templates ./templates
COPY ./tests ./tests
RUN pip install -r app/requirements.txt

89
app/db.py Normal file
View File

@@ -0,0 +1,89 @@
from elasticsearch import Elasticsearch, helpers
from elasticsearch.exceptions import RequestError
import pandas as pd
import warnings
warnings.filterwarnings("ignore")
index_name = "movies"
es = Elasticsearch(hosts=[{"scheme": "http", "host": "host.docker.internal", "port": 9200}], max_retries=30,
retry_on_timeout=True, request_timeout=30)
if not es.ping():
raise ValueError("Connection failed.")
else:
print("Successfully connected to Elasticsearch.")
movies = pd.read_feather("/app/data/movies.feather")
index = {
"settings": {
"analysis": {
"filter": {
"my_ascii_folding": {
"type": "asciifolding",
"preserve_original": "true"
},
"autocomplete_filter": {
"type": "edge_ngram",
"min_gram": 1,
"max_gram": 10
},
"turkish_lowercase": {
"type": "lowercase",
"language": "turkish"
},
"turkish_stemmer": {
"type": "stemmer",
"language": "turkish"}
},
"analyzer": {
"autocomplete": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"turkish_lowercase",
"turkish_stemmer",
"autocomplete_filter",
"my_ascii_folding"
]
}
}
}
},
"mappings": {
"properties": {
"movies": {
"type": "text",
"analyzer": "autocomplete",
"search_analyzer": "standard"
}
}
}
}
def dataframe_to_es(df, es_index):
for df_idx, line in df.iteritems():
yield {
"_index": es_index,
"_id": df_idx,
"type": "_doc",
"_source": {
"movies": line
}
}
try:
es.indices.delete(index_name)
except:
print("There is no index called %s." % index_name)
print("Creating index %s." % index_name)
es.indices.create(index=index_name, ignore=400, body=index)
helpers.bulk(es, dataframe_to_es(
movies["movie_name"], index_name), raise_on_error=False)
print("Indexing complete & Indexed %s rows." % movies.shape[0])

62
app/main.py Normal file
View File

@@ -0,0 +1,62 @@
from operator import index
from fastapi.templating import Jinja2Templates
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from elasticsearch import Elasticsearch
from pydantic import BaseModel
from fastapi import FastAPI, Request
import warnings
warnings.filterwarnings("ignore")
templates = Jinja2Templates(directory="./templates")
app = FastAPI(title="Data API")
es = Elasticsearch({"scheme": "http", "host": "host.docker.internal", "port": 9200}, max_retries=30,
retry_on_timeout=True, request_timeout=30)
if not es.ping():
raise ValueError("Connection failed")
else:
print("Successfully connected to Elasticsearch.")
index_name = "movies"
origins = ["*"]
app.add_middleware(
CORSMiddleware,
allow_origins=origins,
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"])
@app.get('/health', status_code=200, summary="Returns HC Page.", tags=["hc"])
async def home():
return {"message": "Still Alive"}
@app.get("/", status_code=200, summary="Returns Search Page.", tags=["search"])
def root(request: Request):
result = "Type a number"
return templates.TemplateResponse('home.html', context={'request': request, 'result': result})
@app.get('/match', status_code=200, summary="Returns Matches.", tags=["search"])
async def match(term: str):
body = {
"query": {
"match": {
"movies": {
"query": term,
"fuzziness": "auto"
}
}
}
}
res = es.search(index=index_name, body=body)
if res["hits"]["total"]["value"] > 0:
f = []
for i in res["hits"]["hits"][0:-1]:
f.append(i["_source"]["movies"])
return f
else:
return "Sonuç Bulunamadı"

9
app/requirements.txt Normal file
View File

@@ -0,0 +1,9 @@
elasticsearch==8.0.0
# elasticsearch-dsl==7.4.0
requests==2.27.1
fastapi==0.74.0
uvicorn==0.17.5
pytest==6.2.5
pandas==1.3.5
pyarrow==5.0.0
jinja2==3.0.1

52
docker-compose.yml Normal file
View File

@@ -0,0 +1,52 @@
version: "3.5"
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:8.0.0
container_name: es
restart: always
environment:
- discovery.type=single-node
- xpack.security.enabled=false
- 'ES_JAVA_OPTS=-Xmx128m -Xms128m'
- bootstrap.memory_lock=true
ports:
- 9200:9200
networks:
- es-kib
healthcheck:
test:
[
"CMD",
"curl",
"-f",
"http://es:9200/_cluster/health?pretty"
]
interval: 30s
timeout: 10s
retries: 3
kibana:
image: docker.elastic.co/kibana/kibana:8.0.0
container_name: kibana
environment:
- ELASTICSEARCH_HOSTS=http://es:9200
- xpack.security.enabled=false
ports:
- "5601:5601"
networks:
- es-kib
depends_on:
- elasticsearch
web:
build: .
container_name: app
restart: on-failure
depends_on:
- es
command: >
sh -c "sleep 30; python app/db.py && uvicorn app.main:app --host 0.0.0.0 --port 8000"
ports:
- 8000:8000
networks:
es-kib:
driver: bridge

78
readme.md Normal file
View File

@@ -0,0 +1,78 @@
# Turkish Movie Search Engine
<br>
<p style="text-align:center">
<img src="https://fastapi.tiangolo.com/img/logo-margin/logo-teal.png" width="200" >
<img src="https://plugins.jetbrains.com/files/16111/151977/icon/pluginIcon.png" width="100">
<img src="https://www.bujarra.com/wp-content/uploads/2018/11/kibana0.jpg" width="100" >
<img src="https://devnot.com/wp-content/uploads/2017/09/docker-compose.jpg" width="200" >
</p>
<br>
<br>
<center>
![](https://im2.ezgif.com/tmp/ezgif-2-d2bcaa85eb.gif)
</center>
Code contains a template for using FastAPI backend with Elasticsearch & Kibana.
Data source: [Turkish Movie Sentiment Analysis Dataset](https://www.kaggle.com/mustfkeskin/turkish-movie-sentiment-analysis-dataset)
* I just selected unique movie names.
## Installation
There are only two prerequisites:
* [Docker](https://docs.docker.com/get-docker/)
* [Docker-compose](https://docs.docker.com/compose/install/)
<br>
``` bash
git clone https://github.com/silverstone1903/autocomplete-search-fastapi-es-kibana
```
## Usage
### Start
``` bash
docker-compose up -d
```
If you make any changes you can add `--build`.
``` bash
docker-compose up --build -d
```
### Stopping containers
``` bash
docker-compose down
```
### Container Logs
When running containers with detached mode (`-d`) they work in the background thus you can't see the flowing logs. If you want to check compose logs with cli you can use `logs`.
``` bash
docker-compose logs --tail 50
```
* FastAPI (UI): http://localhost:8000
* Elasticsearch: http://localhost:9200
* Kibana: http://localhost:5601
# Tests
If you want to run the tests inside the container;
```bash
docker-compose exec web pytest tests -sv
```
# Sources
* [DataAPI](https://github.com/naciyuksel/DataAPI)
* [Fast Autocomplete Search for Your Website](https://github.com/simonw/24ways-datasette)
* [Elastic.co](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html#turkish-analyzer)

88
templates/home.html Normal file
View File

@@ -0,0 +1,88 @@
<!DOCTYPE html>
<html lang="en" dir="ltr">
<!-- https://github.com/simonw/24ways-datasette/blob/master/index.html -->
<head>
<meta charset="utf-8">
<title>Türkçe Film Arama Motoru</title>
<style>
body {
font-family: helvetica, sans-serif;
line-height: 1.4;
}
</style>
</head>
<body>
<center>
<h1>Film Arama Motoru</h1>
<form>
<p><input id="searchbox" type="Film adını giriniz" placeholder="Film Adı" style="width: 60%"></p>
</form>
<div id="results"></div>
<script>
const searchbox = document.getElementById("searchbox");
console.log(searchbox);
let requestInFlight = null;
searchbox.onkeyup = debounce(() => {
const q = searchbox.value;
const url = (
"http://localhost:8000/match?term=" + encodeURIComponent(q)
);
let currentRequest = {};
requestInFlight = currentRequest;
fetch(url).then(r => r.json()).then(d => {
if (requestInFlight !== currentRequest) {
return;
}
let results = d.map(r => `
<div class="result">
<h3>${r}</h3>
`).join("");
document.getElementById("results").innerHTML = results;
});
}, 100);
function debounce(func, wait, immediate) {
let timeout;
return function () {
let context = this, args = arguments;
let later = () => {
timeout = null;
if (!immediate) func.apply(context, args);
};
let callNow = immediate && !timeout;
clearTimeout(timeout);
timeout = setTimeout(later, wait);
if (callNow) func.apply(context, args);
};
};
const highlight = (s) => htmlEscape(s).replace(
/b4de2a49c8/g, '<b>'
).replace(
/8c94a2ed4b/g, '</b>'
);
const htmlEscape = (s) => s.replace(
/&/g, '&amp;'
).replace(
/>/g, '&gt;'
).replace(
/</g, '&lt;'
).replace(
/"/g, '&quot;'
).replace(
/'/g, '&#039;'
);
</script>
</center>
</body>
</html>

0
tests/__init__.py Normal file
View File

2
tests/pytest.ini Normal file
View File

@@ -0,0 +1,2 @@
[pytest]
addopts = -p no:warnings

24
tests/test_main.py Normal file
View File

@@ -0,0 +1,24 @@
import pytest
import json
import time
from fastapi.testclient import TestClient
from app.main import app
client = TestClient(app)
def test_main():
response = client.get("/health")
assert response.status_code == 200
assert response.json() == {"message": "Still Alive"}
def test_get_empty_data():
response = client.get("/match?term=")
assert response.status_code == 200
assert response.json() == "Sonuç Bulunamadı"
def test_get_data():
response = client.get("/match?term=baba")
assert response.status_code == 200
assert response.json() != []