mirror of
https://github.com/silverstone1903/autocomplete-search-fastapi-es-kibana.git
synced 2022-03-07 18:31:58 +03:00
First Commit ✔
This commit is contained in:
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
.ipynb_checkpoints
|
||||
*/.ipynb_checkpoints/*
|
||||
.Rhistory
|
||||
*.R
|
||||
*.ipynb
|
||||
*.feather
|
||||
*/*.feather
|
||||
14
Dockerfile
Normal file
14
Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM python:3.7-slim
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
|
||||
COPY ./app ./app
|
||||
COPY ./data ./data
|
||||
COPY ./templates ./templates
|
||||
COPY ./tests ./tests
|
||||
RUN pip install -r app/requirements.txt
|
||||
|
||||
|
||||
89
app/db.py
Normal file
89
app/db.py
Normal file
@@ -0,0 +1,89 @@
|
||||
from elasticsearch import Elasticsearch, helpers
|
||||
from elasticsearch.exceptions import RequestError
|
||||
import pandas as pd
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
index_name = "movies"
|
||||
|
||||
es = Elasticsearch(hosts=[{"scheme": "http", "host": "host.docker.internal", "port": 9200}], max_retries=30,
|
||||
retry_on_timeout=True, request_timeout=30)
|
||||
|
||||
if not es.ping():
|
||||
raise ValueError("Connection failed.")
|
||||
else:
|
||||
print("Successfully connected to Elasticsearch.")
|
||||
|
||||
movies = pd.read_feather("/app/data/movies.feather")
|
||||
|
||||
index = {
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"filter": {
|
||||
"my_ascii_folding": {
|
||||
"type": "asciifolding",
|
||||
"preserve_original": "true"
|
||||
},
|
||||
"autocomplete_filter": {
|
||||
"type": "edge_ngram",
|
||||
"min_gram": 1,
|
||||
"max_gram": 10
|
||||
},
|
||||
"turkish_lowercase": {
|
||||
"type": "lowercase",
|
||||
"language": "turkish"
|
||||
},
|
||||
"turkish_stemmer": {
|
||||
"type": "stemmer",
|
||||
"language": "turkish"}
|
||||
},
|
||||
"analyzer": {
|
||||
"autocomplete": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": [
|
||||
"turkish_lowercase",
|
||||
"turkish_stemmer",
|
||||
"autocomplete_filter",
|
||||
"my_ascii_folding"
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"properties": {
|
||||
"movies": {
|
||||
"type": "text",
|
||||
"analyzer": "autocomplete",
|
||||
"search_analyzer": "standard"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
def dataframe_to_es(df, es_index):
|
||||
for df_idx, line in df.iteritems():
|
||||
yield {
|
||||
"_index": es_index,
|
||||
"_id": df_idx,
|
||||
"type": "_doc",
|
||||
"_source": {
|
||||
"movies": line
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
try:
|
||||
es.indices.delete(index_name)
|
||||
except:
|
||||
print("There is no index called %s." % index_name)
|
||||
print("Creating index %s." % index_name)
|
||||
es.indices.create(index=index_name, ignore=400, body=index)
|
||||
|
||||
helpers.bulk(es, dataframe_to_es(
|
||||
movies["movie_name"], index_name), raise_on_error=False)
|
||||
|
||||
print("Indexing complete & Indexed %s rows." % movies.shape[0])
|
||||
62
app/main.py
Normal file
62
app/main.py
Normal file
@@ -0,0 +1,62 @@
|
||||
from operator import index
|
||||
from fastapi.templating import Jinja2Templates
|
||||
from fastapi.responses import HTMLResponse
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from elasticsearch import Elasticsearch
|
||||
from pydantic import BaseModel
|
||||
from fastapi import FastAPI, Request
|
||||
import warnings
|
||||
warnings.filterwarnings("ignore")
|
||||
|
||||
templates = Jinja2Templates(directory="./templates")
|
||||
app = FastAPI(title="Data API")
|
||||
es = Elasticsearch({"scheme": "http", "host": "host.docker.internal", "port": 9200}, max_retries=30,
|
||||
retry_on_timeout=True, request_timeout=30)
|
||||
|
||||
if not es.ping():
|
||||
raise ValueError("Connection failed")
|
||||
else:
|
||||
print("Successfully connected to Elasticsearch.")
|
||||
|
||||
index_name = "movies"
|
||||
|
||||
origins = ["*"]
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=origins,
|
||||
allow_credentials=True,
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"])
|
||||
|
||||
|
||||
@app.get('/health', status_code=200, summary="Returns HC Page.", tags=["hc"])
|
||||
async def home():
|
||||
return {"message": "Still Alive"}
|
||||
|
||||
|
||||
@app.get("/", status_code=200, summary="Returns Search Page.", tags=["search"])
|
||||
def root(request: Request):
|
||||
result = "Type a number"
|
||||
return templates.TemplateResponse('home.html', context={'request': request, 'result': result})
|
||||
|
||||
|
||||
@app.get('/match', status_code=200, summary="Returns Matches.", tags=["search"])
|
||||
async def match(term: str):
|
||||
body = {
|
||||
"query": {
|
||||
"match": {
|
||||
"movies": {
|
||||
"query": term,
|
||||
"fuzziness": "auto"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
res = es.search(index=index_name, body=body)
|
||||
if res["hits"]["total"]["value"] > 0:
|
||||
f = []
|
||||
for i in res["hits"]["hits"][0:-1]:
|
||||
f.append(i["_source"]["movies"])
|
||||
return f
|
||||
else:
|
||||
return "Sonuç Bulunamadı"
|
||||
9
app/requirements.txt
Normal file
9
app/requirements.txt
Normal file
@@ -0,0 +1,9 @@
|
||||
elasticsearch==8.0.0
|
||||
# elasticsearch-dsl==7.4.0
|
||||
requests==2.27.1
|
||||
fastapi==0.74.0
|
||||
uvicorn==0.17.5
|
||||
pytest==6.2.5
|
||||
pandas==1.3.5
|
||||
pyarrow==5.0.0
|
||||
jinja2==3.0.1
|
||||
52
docker-compose.yml
Normal file
52
docker-compose.yml
Normal file
@@ -0,0 +1,52 @@
|
||||
version: "3.5"
|
||||
|
||||
services:
|
||||
elasticsearch:
|
||||
image: docker.elastic.co/elasticsearch/elasticsearch:8.0.0
|
||||
container_name: es
|
||||
restart: always
|
||||
environment:
|
||||
- discovery.type=single-node
|
||||
- xpack.security.enabled=false
|
||||
- 'ES_JAVA_OPTS=-Xmx128m -Xms128m'
|
||||
- bootstrap.memory_lock=true
|
||||
ports:
|
||||
- 9200:9200
|
||||
networks:
|
||||
- es-kib
|
||||
healthcheck:
|
||||
test:
|
||||
[
|
||||
"CMD",
|
||||
"curl",
|
||||
"-f",
|
||||
"http://es:9200/_cluster/health?pretty"
|
||||
]
|
||||
interval: 30s
|
||||
timeout: 10s
|
||||
retries: 3
|
||||
kibana:
|
||||
image: docker.elastic.co/kibana/kibana:8.0.0
|
||||
container_name: kibana
|
||||
environment:
|
||||
- ELASTICSEARCH_HOSTS=http://es:9200
|
||||
- xpack.security.enabled=false
|
||||
ports:
|
||||
- "5601:5601"
|
||||
networks:
|
||||
- es-kib
|
||||
depends_on:
|
||||
- elasticsearch
|
||||
web:
|
||||
build: .
|
||||
container_name: app
|
||||
restart: on-failure
|
||||
depends_on:
|
||||
- es
|
||||
command: >
|
||||
sh -c "sleep 30; python app/db.py && uvicorn app.main:app --host 0.0.0.0 --port 8000"
|
||||
ports:
|
||||
- 8000:8000
|
||||
networks:
|
||||
es-kib:
|
||||
driver: bridge
|
||||
78
readme.md
Normal file
78
readme.md
Normal file
@@ -0,0 +1,78 @@
|
||||
# Turkish Movie Search Engine
|
||||
|
||||
<br>
|
||||
|
||||
<p style="text-align:center">
|
||||
<img src="https://fastapi.tiangolo.com/img/logo-margin/logo-teal.png" width="200" >
|
||||
<img src="https://plugins.jetbrains.com/files/16111/151977/icon/pluginIcon.png" width="100">
|
||||
<img src="https://www.bujarra.com/wp-content/uploads/2018/11/kibana0.jpg" width="100" >
|
||||
<img src="https://devnot.com/wp-content/uploads/2017/09/docker-compose.jpg" width="200" >
|
||||
|
||||
</p>
|
||||
<br>
|
||||
<br>
|
||||
<center>
|
||||
|
||||

|
||||
</center>
|
||||
|
||||
Code contains a template for using FastAPI backend with Elasticsearch & Kibana.
|
||||
|
||||
Data source: [Turkish Movie Sentiment Analysis Dataset](https://www.kaggle.com/mustfkeskin/turkish-movie-sentiment-analysis-dataset)
|
||||
* I just selected unique movie names.
|
||||
|
||||
## Installation
|
||||
|
||||
There are only two prerequisites:
|
||||
|
||||
* [Docker](https://docs.docker.com/get-docker/)
|
||||
* [Docker-compose](https://docs.docker.com/compose/install/)
|
||||
|
||||
<br>
|
||||
|
||||
``` bash
|
||||
git clone https://github.com/silverstone1903/autocomplete-search-fastapi-es-kibana
|
||||
```
|
||||
|
||||
## Usage
|
||||
### Start
|
||||
|
||||
``` bash
|
||||
docker-compose up -d
|
||||
```
|
||||
|
||||
If you make any changes you can add `--build`.
|
||||
|
||||
``` bash
|
||||
docker-compose up --build -d
|
||||
```
|
||||
|
||||
### Stopping containers
|
||||
|
||||
``` bash
|
||||
docker-compose down
|
||||
```
|
||||
### Container Logs
|
||||
When running containers with detached mode (`-d`) they work in the background thus you can't see the flowing logs. If you want to check compose logs with cli you can use `logs`.
|
||||
|
||||
``` bash
|
||||
docker-compose logs --tail 50
|
||||
```
|
||||
|
||||
* FastAPI (UI): http://localhost:8000
|
||||
* Elasticsearch: http://localhost:9200
|
||||
* Kibana: http://localhost:5601
|
||||
|
||||
# Tests
|
||||
|
||||
If you want to run the tests inside the container;
|
||||
|
||||
```bash
|
||||
docker-compose exec web pytest tests -sv
|
||||
```
|
||||
|
||||
|
||||
# Sources
|
||||
* [DataAPI](https://github.com/naciyuksel/DataAPI)
|
||||
* [Fast Autocomplete Search for Your Website](https://github.com/simonw/24ways-datasette)
|
||||
* [Elastic.co](https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html#turkish-analyzer)
|
||||
88
templates/home.html
Normal file
88
templates/home.html
Normal file
@@ -0,0 +1,88 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en" dir="ltr">
|
||||
|
||||
<!-- https://github.com/simonw/24ways-datasette/blob/master/index.html -->
|
||||
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Türkçe Film Arama Motoru</title>
|
||||
<style>
|
||||
body {
|
||||
font-family: helvetica, sans-serif;
|
||||
line-height: 1.4;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<center>
|
||||
<h1>Film Arama Motoru</h1>
|
||||
|
||||
<form>
|
||||
<p><input id="searchbox" type="Film adını giriniz" placeholder="Film Adı" style="width: 60%"></p>
|
||||
</form>
|
||||
<div id="results"></div>
|
||||
|
||||
<script>
|
||||
const searchbox = document.getElementById("searchbox");
|
||||
console.log(searchbox);
|
||||
let requestInFlight = null;
|
||||
|
||||
searchbox.onkeyup = debounce(() => {
|
||||
const q = searchbox.value;
|
||||
const url = (
|
||||
"http://localhost:8000/match?term=" + encodeURIComponent(q)
|
||||
);
|
||||
let currentRequest = {};
|
||||
requestInFlight = currentRequest;
|
||||
fetch(url).then(r => r.json()).then(d => {
|
||||
if (requestInFlight !== currentRequest) {
|
||||
return;
|
||||
}
|
||||
let results = d.map(r => `
|
||||
<div class="result">
|
||||
|
||||
<h3>${r}</h3>
|
||||
|
||||
`).join("");
|
||||
document.getElementById("results").innerHTML = results;
|
||||
});
|
||||
}, 100);
|
||||
|
||||
function debounce(func, wait, immediate) {
|
||||
let timeout;
|
||||
return function () {
|
||||
let context = this, args = arguments;
|
||||
let later = () => {
|
||||
timeout = null;
|
||||
if (!immediate) func.apply(context, args);
|
||||
};
|
||||
let callNow = immediate && !timeout;
|
||||
clearTimeout(timeout);
|
||||
timeout = setTimeout(later, wait);
|
||||
if (callNow) func.apply(context, args);
|
||||
};
|
||||
};
|
||||
|
||||
const highlight = (s) => htmlEscape(s).replace(
|
||||
/b4de2a49c8/g, '<b>'
|
||||
).replace(
|
||||
/8c94a2ed4b/g, '</b>'
|
||||
);
|
||||
|
||||
const htmlEscape = (s) => s.replace(
|
||||
/&/g, '&'
|
||||
).replace(
|
||||
/>/g, '>'
|
||||
).replace(
|
||||
/</g, '<'
|
||||
).replace(
|
||||
/"/g, '"'
|
||||
).replace(
|
||||
/'/g, '''
|
||||
);
|
||||
</script>
|
||||
</center>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
0
tests/__init__.py
Normal file
0
tests/__init__.py
Normal file
2
tests/pytest.ini
Normal file
2
tests/pytest.ini
Normal file
@@ -0,0 +1,2 @@
|
||||
[pytest]
|
||||
addopts = -p no:warnings
|
||||
24
tests/test_main.py
Normal file
24
tests/test_main.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import pytest
|
||||
import json
|
||||
import time
|
||||
from fastapi.testclient import TestClient
|
||||
from app.main import app
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
def test_main():
|
||||
response = client.get("/health")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == {"message": "Still Alive"}
|
||||
|
||||
|
||||
def test_get_empty_data():
|
||||
response = client.get("/match?term=")
|
||||
assert response.status_code == 200
|
||||
assert response.json() == "Sonuç Bulunamadı"
|
||||
|
||||
|
||||
def test_get_data():
|
||||
response = client.get("/match?term=baba")
|
||||
assert response.status_code == 200
|
||||
assert response.json() != []
|
||||
Reference in New Issue
Block a user