1+ DEFAULT_FIELDS = "Title,Author,ID,MD5"
2+
3+ LIBGEN_URL = "http://libgen.io/foreignfiction/"
4+
5+ BOOK_ENDPOINT = "json.php?ids={0}&fields={1}"
6+ DOWNLOAD_URL = "get.php?md5={0}"
7+ SEARCH_URL = "index.php"
8+
9+ ID_REGEX = "\?id=[0-9]+"
10+
11+ import requests
12+ import json
13+ from collections import namedtuple
14+ from lxml import etree
15+
16+ import re
17+
18+
19+ def _json_object_hook (d ): return namedtuple ('X' , d .keys ())(* d .values ())
20+ def json2obj (data ): return json .loads (data , object_hook = _json_object_hook )
21+
22+ def xpath (node , path ):
23+ tree = node .getroottree ()
24+ base_xpath = tree .getpath (node )
25+
26+ return tree .xpath (base_xpath + path )
27+
28+ class LibgenDownload :
29+ def __init__ (self , url , format , size , unit ):
30+ self .url = url
31+ self .format = format
32+ self .size = size
33+ self .unit = unit
34+
35+ @staticmethod
36+ def parse (node ):
37+ DOWNLOAD_REGEX = "([A-z0-9]+)\(([0-9]+)([A-z]+)\)"
38+
39+ text = node .text
40+ match = re .match (DOWNLOAD_REGEX , text )
41+
42+ if not match :
43+ return None
44+
45+ url = node .get ('href' )
46+
47+ format = match .group (1 )
48+ size = match .group (2 )
49+ unit = match .group (3 )
50+
51+ return LibgenDownload (url , format , size , unit )
52+
53+ class LibgenBook :
54+ def __init__ (self , title , author , series , downloads , language ):
55+ self .title = title
56+ self .author = author
57+ self .series = series
58+ self .downloads = downloads
59+ self .language = language
60+
61+ @staticmethod
62+ def parse (node ):
63+ AUTHOR_XPATH = '/td[1]/a'
64+ SERIES_XPATH = '/td[2]'
65+ TITLE_XPATH = '/td[3]'
66+ LANGUAGE_XPATH = '/td[4]'
67+ DOWNLOADS_XPATH = '/td[5]/div/a[1]'
68+
69+ author = xpath (node , AUTHOR_XPATH )[0 ].text
70+ series = xpath (node , SERIES_XPATH )[0 ].text
71+ title = xpath (node , TITLE_XPATH )[0 ].text
72+ language = xpath (node , LANGUAGE_XPATH )[0 ].text
73+
74+ downloads_nodes = xpath (node , DOWNLOADS_XPATH )
75+ downloads = [LibgenDownload .parse (n ) for n in downloads_nodes ]
76+
77+ return LibgenBook (title , author , series , downloads , language )
78+
79+ class LibgenSearchResults :
80+ def __init__ (self , results , total ):
81+ self .results = results
82+ self .total = total
83+
84+ @staticmethod
85+ def parse (node ):
86+ SEARCH_ROW_SELECTOR = "//table[2]//tr"
87+
88+ result_rows = xpath (node , SEARCH_ROW_SELECTOR )
89+
90+ results = [LibgenBook .parse (row ) for row in result_rows ]
91+ total = 0
92+
93+ return LibgenSearchResults (results , total )
94+
95+ class LibgenFictionClient :
96+ def __init__ (self , base_url = LIBGEN_URL ):
97+ self .base_url = base_url
98+
99+ def search (self , query ):
100+ url = self .base_url + SEARCH_URL
101+ query_params = {
102+ 's' : query ,
103+ 'f_group' : 1
104+ }
105+
106+ response = requests .get (url , params = query_params )
107+
108+ parser = etree .HTMLParser ()
109+ tree = etree .fromstring (response .text , parser )
110+
111+ return LibgenSearchResults .parse (tree )
112+
113+
114+ if __name__ == "__main__" :
115+ client = LibgenClient ()
116+ result = client .search ("Stormlight Archive" )
0 commit comments