Skip to content

Commit c174ff6

Browse files
Adds a simple LibGen Fiction client
0 parents  commit c174ff6

1 file changed

Lines changed: 116 additions & 0 deletions

File tree

libgen_client.py

Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
DEFAULT_FIELDS = "Title,Author,ID,MD5"
2+
3+
LIBGEN_URL = "http://libgen.io/foreignfiction/"
4+
5+
BOOK_ENDPOINT = "json.php?ids={0}&fields={1}"
6+
DOWNLOAD_URL = "get.php?md5={0}"
7+
SEARCH_URL = "index.php"
8+
9+
ID_REGEX = "\?id=[0-9]+"
10+
11+
import requests
12+
import json
13+
from collections import namedtuple
14+
from lxml import etree
15+
16+
import re
17+
18+
19+
def _json_object_hook(d): return namedtuple('X', d.keys())(*d.values())
20+
def json2obj(data): return json.loads(data, object_hook=_json_object_hook)
21+
22+
def xpath(node, path):
23+
tree = node.getroottree()
24+
base_xpath = tree.getpath(node)
25+
26+
return tree.xpath(base_xpath + path)
27+
28+
class LibgenDownload:
29+
def __init__(self, url, format, size, unit):
30+
self.url = url
31+
self.format = format
32+
self.size = size
33+
self.unit = unit
34+
35+
@staticmethod
36+
def parse(node):
37+
DOWNLOAD_REGEX = "([A-z0-9]+)\(([0-9]+)([A-z]+)\)"
38+
39+
text = node.text
40+
match = re.match(DOWNLOAD_REGEX, text)
41+
42+
if not match:
43+
return None
44+
45+
url = node.get('href')
46+
47+
format = match.group(1)
48+
size = match.group(2)
49+
unit = match.group(3)
50+
51+
return LibgenDownload(url, format, size, unit)
52+
53+
class LibgenBook:
54+
def __init__(self, title, author, series, downloads, language):
55+
self.title = title
56+
self.author = author
57+
self.series = series
58+
self.downloads = downloads
59+
self.language = language
60+
61+
@staticmethod
62+
def parse(node):
63+
AUTHOR_XPATH = '/td[1]/a'
64+
SERIES_XPATH = '/td[2]'
65+
TITLE_XPATH = '/td[3]'
66+
LANGUAGE_XPATH = '/td[4]'
67+
DOWNLOADS_XPATH = '/td[5]/div/a[1]'
68+
69+
author = xpath(node, AUTHOR_XPATH)[0].text
70+
series = xpath(node, SERIES_XPATH)[0].text
71+
title = xpath(node, TITLE_XPATH)[0].text
72+
language = xpath(node, LANGUAGE_XPATH)[0].text
73+
74+
downloads_nodes = xpath(node, DOWNLOADS_XPATH)
75+
downloads = [LibgenDownload.parse(n) for n in downloads_nodes]
76+
77+
return LibgenBook(title, author, series, downloads, language)
78+
79+
class LibgenSearchResults:
80+
def __init__(self, results, total):
81+
self.results = results
82+
self.total = total
83+
84+
@staticmethod
85+
def parse(node):
86+
SEARCH_ROW_SELECTOR = "//table[2]//tr"
87+
88+
result_rows = xpath(node, SEARCH_ROW_SELECTOR)
89+
90+
results = [LibgenBook.parse(row) for row in result_rows]
91+
total = 0
92+
93+
return LibgenSearchResults(results, total)
94+
95+
class LibgenFictionClient:
96+
def __init__(self, base_url=LIBGEN_URL):
97+
self.base_url = base_url
98+
99+
def search(self, query):
100+
url = self.base_url + SEARCH_URL
101+
query_params = {
102+
's': query,
103+
'f_group': 1
104+
}
105+
106+
response = requests.get(url, params=query_params)
107+
108+
parser = etree.HTMLParser()
109+
tree = etree.fromstring(response.text, parser)
110+
111+
return LibgenSearchResults.parse(tree)
112+
113+
114+
if __name__ == "__main__":
115+
client = LibgenClient()
116+
result = client.search("Stormlight Archive")

0 commit comments

Comments
 (0)