Skip to content

Commit 9a0725b

Browse files
authored
chore: Fix new dir structure-related issues (#854)
1 parent 2e0fbff commit 9a0725b

15 files changed

Lines changed: 27 additions & 28 deletions

File tree

docs/upgrading/upgrading_to_v0x.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,16 @@ Example update:
4949
### Service locator
5050

5151
- The `crawlee.service_container` was completely refactored and renamed to `crawlee.service_locator`.
52+
- You can use it to set the configuration, event manager or storage client globally. Or you can pass them to your crawler instance directly and it will use the service locator under the hood.
5253

5354
### Statistics
5455

5556
- The `crawlee.statistics.Statistics` class do not accept an event manager as an input argument anymore. It uses the default, global one.
57+
- If you want to set your custom event manager, do it either via the service locator or pass it to the crawler.
5658

5759
### Request
5860

59-
- Removed properties `json_` and `order_no`.
61+
- The properties `json_` and `order_no` were removed. They were there only for the internal purpose of the memory storage client, you should not need them.
6062

6163
### Request storages and loaders
6264

src/crawlee/browsers/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from ._playwright_browser_plugin import PlaywrightBrowserPlugin
55
except ImportError as exc:
66
raise ImportError(
7-
"To import anything from this subpackage, you need to install the 'playwright' extra. "
7+
"To import this, you need to install the 'playwright' extra. "
88
"For example, if you use pip, run `pip install 'crawlee[playwright]'`.",
99
) from exc
1010

src/crawlee/crawlers/_abstract_http/_abstract_http_crawler.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,23 @@ class HttpCrawlerOptions(Generic[TCrawlingContext], BasicCrawlerOptions[TCrawlin
4040
"""Additional HTTP status codes to treat as errors, triggering automatic retries when encountered."""
4141

4242
ignore_http_error_status_codes: NotRequired[Iterable[int]]
43-
"""HTTP status codes typically considered errors but to be treated as successful responses."""
43+
"""HTTP status codes that are typically considered errors but should be treated as successful responses."""
4444

4545

4646
@docs_group('Abstract classes')
4747
class AbstractHttpCrawler(Generic[TCrawlingContext, TParseResult], BasicCrawler[TCrawlingContext], ABC):
4848
"""A web crawler for performing HTTP requests.
4949
50-
The `AbstractHttpCrawler` builds on top of the `BasicCrawler`, which means it inherits all of its features. On top
51-
of that it implements the HTTP communication using the HTTP clients. The class allows integration with
52-
any HTTP client that implements the `BaseHttpClient` interface. The HTTP client is provided to the crawler
53-
as an input parameter to the constructor.
54-
AbstractHttpCrawler is generic class and is expected to be used together with specific parser that will be used to
55-
parse http response and type of expected TCrawlingContext which is available to the user function.
56-
See prepared specific version of it: BeautifulSoupCrawler, ParselCrawler or HttpCrawler for example.
50+
The `AbstractHttpCrawler` builds on top of the `BasicCrawler`, inheriting all its features. Additionally,
51+
it implements HTTP communication using HTTP clients. The class allows integration with any HTTP client
52+
that implements the `BaseHttpClient` interface, provided as an input parameter to the constructor.
5753
58-
The HTTP client-based crawlers are ideal for websites that do not require JavaScript execution. However,
59-
if you need to execute client-side JavaScript, consider using a browser-based crawler like the `PlaywrightCrawler`.
54+
`AbstractHttpCrawler` is a generic class intended to be used with a specific parser for parsing HTTP responses
55+
and the expected type of `TCrawlingContext` available to the user function. Examples of specific versions include
56+
`BeautifulSoupCrawler`, `ParselCrawler`, and `HttpCrawler`.
57+
58+
HTTP client-based crawlers are ideal for websites that do not require JavaScript execution. For websites that
59+
require client-side JavaScript execution, consider using a browser-based crawler like the `PlaywrightCrawler`.
6060
"""
6161

6262
def __init__(

src/crawlee/crawlers/_beautifulsoup/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from ._beautifulsoup_parser import BeautifulSoupParserType
55
except ImportError as exc:
66
raise ImportError(
7-
"To import anything from this subpackage, you need to install the 'beautifulsoup' extra. "
7+
"To import this, you need to install the 'beautifulsoup' extra. "
88
"For example, if you use pip, run `pip install 'crawlee[beautifulsoup]'`.",
99
) from exc
1010

src/crawlee/crawlers/_beautifulsoup/_beautifulsoup_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class BeautifulSoupCrawler(AbstractHttpCrawler[BeautifulSoupCrawlingContext, Bea
3232
### Usage
3333
3434
```python
35-
from crawlee.beautifulsoup_crawler import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
35+
from crawlee.crawlers import BeautifulSoupCrawler, BeautifulSoupCrawlingContext
3636
3737
crawler = BeautifulSoupCrawler()
3838

src/crawlee/crawlers/_http/_http_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ class HttpCrawler(AbstractHttpCrawler[ParsedHttpCrawlingContext[bytes], bytes]):
2222
### Usage
2323
2424
```python
25-
from crawlee.http_crawler import HttpCrawler, HttpCrawlingContext
25+
from crawlee.crawlers import HttpCrawler, HttpCrawlingContext
2626
2727
crawler = HttpCrawler()
2828

src/crawlee/crawlers/_parsel/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from ._parsel_crawling_context import ParselCrawlingContext
44
except ImportError as exc:
55
raise ImportError(
6-
"To import anything from this subpackage, you need to install the 'parsel' extra. "
6+
"To import this, you need to install the 'parsel' extra. "
77
"For example, if you use pip, run `pip install 'crawlee[parsel]'`.",
88
) from exc
99

src/crawlee/crawlers/_parsel/_parsel_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ class ParselCrawler(AbstractHttpCrawler[ParselCrawlingContext, Selector]):
3232
### Usage
3333
3434
```python
35-
from crawlee.parsel_crawler import ParselCrawler, ParselCrawlingContext
35+
from crawlee.crawlers import ParselCrawler, ParselCrawlingContext
3636
3737
crawler = ParselCrawler()
3838

src/crawlee/crawlers/_playwright/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from ._playwright_pre_nav_crawling_context import PlaywrightPreNavCrawlingContext
55
except ImportError as exc:
66
raise ImportError(
7-
"To import anything from this subpackage, you need to install the 'playwright' extra. "
7+
"To import this, you need to install the 'playwright' extra. "
88
"For example, if you use pip, run `pip install 'crawlee[playwright]'`.",
99
) from exc
1010

src/crawlee/crawlers/_playwright/_playwright_crawler.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext]):
4545
### Usage
4646
4747
```python
48-
from crawlee.playwright_crawler import PlaywrightCrawler, PlaywrightCrawlingContext
48+
from crawlee.crawlers import PlaywrightCrawler, PlaywrightCrawlingContext
4949
5050
crawler = PlaywrightCrawler()
5151

0 commit comments

Comments
 (0)