Skip to content

Commit f09f4fe

Browse files
committed
JSON and CSV readers are ready
0 parents  commit f09f4fe

5 files changed

Lines changed: 118 additions & 0 deletions

File tree

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.mypy*
2+
data/
3+
__pycache__/
4+
*test.py

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Data Reader
2+
===========
3+
> Python package that **lazily** reads data from large **JSON**, **XML** and **CSV** files.
4+
5+
Why not use the already existing libs you ask ?
6+
Well, for me, it was crucial to have a small package size that can be deployed to a **Serverless** environment as this was the main goal.
7+
8+
Installation
9+
------------
10+
pip install data-reader

data_reader/readers/csv_reader.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import os
2+
3+
4+
class CSVReader:
5+
6+
def __init__(self, file_path, arguments={'separator': ','}):
7+
self.__file_path = file_path
8+
self.__arguments = arguments
9+
10+
self.__initialize()
11+
12+
def __initialize(self):
13+
""" Prepares and validates argument. """
14+
15+
if 'separator' not in self.__arguments:
16+
raise Exception('"separator" argument is required to parse CSV.')
17+
18+
self.__separator = self.__arguments['separator']
19+
self.__skip_header = True
20+
self.__enclosing = '"'
21+
22+
if 'skip_header' in self.__arguments:
23+
self.__skip_header = self.__arguments['skip_header']
24+
if 'enclosing' in self.__arguments:
25+
self.__enclosing = self.__arguments['enclosing']
26+
27+
def iterate(self):
28+
""" allows iteration over parsed data objects with a python generator. """
29+
30+
with open(self.__file_path) as f:
31+
for index, line in enumerate(f):
32+
if index == 0 and self.__skip_header:
33+
continue
34+
35+
obj = self.__parseLine(line)
36+
yield obj
37+
38+
def __parseLine(self, line):
39+
""" Parses a line into python dictionary object """
40+
41+
split = line.split(self.__separator)
42+
formatted = []
43+
44+
if self.__enclosing is not None:
45+
for entry in split:
46+
if entry.startswith(self.__enclosing) and entry.endswith(self.__enclosing):
47+
entry = entry[1:]
48+
entry = entry[:-1]
49+
formatted.append(entry)
50+
51+
del split
52+
53+
return formatted

data_reader/readers/json_reader.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import ijson
2+
import os
3+
4+
5+
class JSONReader:
6+
7+
def __init__(self, file_path, arguments={}):
8+
self.__file_path = file_path
9+
self.__arguments = arguments
10+
11+
self.__item_node = 'item'
12+
if 'item_node' in arguments:
13+
self.__item_node = arguments['item_node']
14+
15+
def iterate(self):
16+
""" Iterates over JSON file stream. """
17+
18+
with open(self.__file_path, 'rb') as fh:
19+
parsed_json = ijson.items(fh, self.__item_node)
20+
21+
for object in parsed_json:
22+
yield object

setup.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import setuptoolsSo i had no choice, this package is
2+
3+
4+
5+
long_desc = ''
6+
with open('README.md', 'r') as f:
7+
long_desc = f.read()
8+
9+
setuptools.setup(
10+
name='data-reader',
11+
version='0.0.1',
12+
author='Ibragim Abubakarov',
13+
author_email='ibragim.ai95@gmail.com',
14+
maintainer='Ibragim Abubakarov',
15+
maintainer_email='ibragim.ai95@gmail.com',
16+
description='Python package that allows easy importation of data from large files like CSV, JSON and XML.',
17+
long_description=long_desc,
18+
long_description_content_type='text/markdown',
19+
url='https://github.com/polkovnik-z/data-reader',
20+
packages=[
21+
'data_reader'
22+
],
23+
install_requires=['pandas', 'imperium', 'ijson'],
24+
classifiers=[
25+
'Programming Language :: Python :: 3',
26+
'Operating System :: OS Independent',
27+
'Intended Audience :: Developers'
28+
]
29+
)

0 commit comments

Comments
 (0)