Skip to content

Commit 9ce23ca

Browse files
committed
Add Sharding Estimate basic algorithm
1 parent 6b92d60 commit 9ce23ca

3 files changed

Lines changed: 43 additions & 2 deletions

File tree

controller/cluster_estimate_controller.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
from fastapi import APIRouter
33
from repository.schemas import Search
44
from injector import (logger, doc)
5-
# from injector import (logger, doc, SearchOmniHandlerInject, QueryBuilderInject)
5+
from injector import (logger,
6+
doc,
7+
SearchOmniHandlerInject,
8+
QueryBuilderInject,
9+
ClusterShardingInject)
610
import json
711
import datetime
812

@@ -26,7 +30,7 @@ async def Cluster_sharding_estimate(request: Search):
2630

2731
EndTime = datetime.datetime.now()
2832

29-
# return await SearchOmniHandlerInject.search(QueryBuilderInject, oas_query=request_json)
33+
return await ClusterShardingInject.sharding_predict(oas_query=request_json)
3034
return {'results' :
3135
{
3236
"the number of primary shards" : 1,

injector.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from config.log_config import create_log
33
from config import config
44
from service.handler.es_search_handler import (SearchOmniHandler)
5+
from service.handler.cluster_sharding import (ClusterShardingHandler)
56
from service.handler.es_query_builder import (QueryBuilder)
67
from elasticsearch import Elasticsearch
78
from dotenv import load_dotenv
@@ -45,5 +46,6 @@ def get_headers():
4546

4647
SearchOmniHandlerInject = SearchOmniHandler(es_client, logger, doc['app'])
4748
QueryBuilderInject = QueryBuilder(es_client, logger, doc['app'])
49+
ClusterShardingInject = ClusterShardingHandler(es_client, logger)
4850

4951

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
2+
import json
3+
4+
class ClusterShardingHandler(object):
5+
'''
6+
You will want to limit your maximum shard size to 30-80 GB if running a recent version of Elasticsearch.
7+
In fact, a single shard can hold as much as 100s of GB and still perform well. (If running below version 6.0 then estimate 30-50 GB.)
8+
9+
Using the 30-80 GB value, you can calculate how many shards you’ll need.
10+
11+
For instance, let’s assume you rotate indices monthly and expect around 600 GB of data per month.
12+
In this example, you would allocate 8 to 20 shards.
13+
'''
14+
15+
def __init__(self, es_client, logger):
16+
self.es_client = es_client
17+
self.logger = logger
18+
19+
async def sharding_predict(self, oas_query=None):
20+
''' Search with QuerBuilder '''
21+
if not oas_query:
22+
oas_query = {}
23+
24+
sharding_results = {'results' :
25+
{
26+
"the number of primary shards" : 1,
27+
"the number of replica shards" : 1
28+
}
29+
}
30+
31+
self.logger.info(json.dumps(sharding_results, indent=2))
32+
33+
return sharding_results
34+
35+

0 commit comments

Comments
 (0)