1515
1616
1717class Configuration (BaseSettings ):
18- """Configuration of the Crawler .
18+ """Configuration settings for the Crawlee project .
1919
20- Args:
21- internal_timeout: Timeout for internal operations such as marking a request as processed.
22- verbose_log: Allows verbose logging.
23- default_storage_id: The default storage ID.
24- purge_on_start: Whether to purge the storage on start.
20+ This class stores common configurable parameters for Crawlee. Default values are provided for all settings,
21+ so typically, no adjustments are necessary. However, you may modify settings for specific use cases,
22+ such as changing the default storage directory, the default storage IDs, the timeout for internal
23+ operations, and more.
24+
25+ Settings can also be configured via environment variables, prefixed with `CRAWLEE_`.
2526 """
2627
2728 model_config = SettingsConfigDict (populate_by_name = True )
2829
2930 internal_timeout : Annotated [timedelta | None , Field (alias = 'crawlee_internal_timeout' )] = None
31+ """Timeout for the internal asynchronous operations."""
3032
3133 verbose_log : Annotated [bool , Field (alias = 'crawlee_verbose_log' )] = False
34+ """Whether to enable verbose logging."""
3235
3336 default_browser_path : Annotated [
3437 str | None ,
@@ -39,6 +42,7 @@ class Configuration(BaseSettings):
3942 )
4043 ),
4144 ] = None
45+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
4246
4347 disable_browser_sandbox : Annotated [
4448 bool ,
@@ -49,6 +53,7 @@ class Configuration(BaseSettings):
4953 )
5054 ),
5155 ] = False
56+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
5257
5358 log_level : Annotated [
5459 Literal ['DEBUG' , 'INFO' , 'WARNING' , 'ERROR' , 'CRITICAL' ],
@@ -60,6 +65,7 @@ class Configuration(BaseSettings):
6065 ),
6166 BeforeValidator (lambda value : str (value ).upper ()),
6267 ] = 'INFO'
68+ """The logging level."""
6369
6470 default_dataset_id : Annotated [
6571 str ,
@@ -71,6 +77,7 @@ class Configuration(BaseSettings):
7177 )
7278 ),
7379 ] = 'default'
80+ """The default dataset ID."""
7481
7582 default_key_value_store_id : Annotated [
7683 str ,
@@ -82,6 +89,7 @@ class Configuration(BaseSettings):
8289 )
8390 ),
8491 ] = 'default'
92+ """The default key-value store ID."""
8593
8694 default_request_queue_id : Annotated [
8795 str ,
@@ -93,6 +101,7 @@ class Configuration(BaseSettings):
93101 )
94102 ),
95103 ] = 'default'
104+ """The default request queue ID."""
96105
97106 purge_on_start : Annotated [
98107 bool ,
@@ -103,8 +112,10 @@ class Configuration(BaseSettings):
103112 )
104113 ),
105114 ] = True
115+ """Whether to purge the storage on the start."""
106116
107117 write_metadata : Annotated [bool , Field (alias = 'crawlee_write_metadata' )] = True
118+ """Whether to write the storage metadata."""
108119
109120 persist_storage : Annotated [
110121 bool ,
@@ -115,6 +126,7 @@ class Configuration(BaseSettings):
115126 )
116127 ),
117128 ] = True
129+ """Whether to persist the storage."""
118130
119131 persist_state_interval : Annotated [
120132 timedelta_ms ,
@@ -125,6 +137,7 @@ class Configuration(BaseSettings):
125137 )
126138 ),
127139 ] = timedelta (minutes = 1 )
140+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
128141
129142 system_info_interval : Annotated [
130143 timedelta_ms ,
@@ -135,6 +148,7 @@ class Configuration(BaseSettings):
135148 )
136149 ),
137150 ] = timedelta (seconds = 1 )
151+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
138152
139153 max_used_cpu_ratio : Annotated [
140154 float ,
@@ -145,6 +159,7 @@ class Configuration(BaseSettings):
145159 )
146160 ),
147161 ] = 0.95
162+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
148163
149164 memory_mbytes : Annotated [
150165 int | None ,
@@ -156,6 +171,7 @@ class Configuration(BaseSettings):
156171 )
157172 ),
158173 ] = None
174+ """The maximum memory in megabytes. The `Snapshotter.max_memory_size` is set to this value."""
159175
160176 available_memory_ratio : Annotated [
161177 float ,
@@ -166,6 +182,7 @@ class Configuration(BaseSettings):
166182 )
167183 ),
168184 ] = 0.25
185+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
169186
170187 storage_dir : Annotated [
171188 str ,
@@ -176,6 +193,7 @@ class Configuration(BaseSettings):
176193 ),
177194 ),
178195 ] = './storage'
196+ """The path to the storage directory."""
179197
180198 chrome_executable_path : Annotated [
181199 str | None ,
@@ -186,6 +204,7 @@ class Configuration(BaseSettings):
186204 )
187205 ),
188206 ] = None
207+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
189208
190209 headless : Annotated [
191210 bool ,
@@ -196,6 +215,7 @@ class Configuration(BaseSettings):
196215 )
197216 ),
198217 ] = True
218+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
199219
200220 xvfb : Annotated [
201221 bool ,
@@ -206,6 +226,7 @@ class Configuration(BaseSettings):
206226 )
207227 ),
208228 ] = False
229+ """This setting is currently unused. For more details, see https://github.com/apify/crawlee-python/issues/670."""
209230
210231 @classmethod
211232 def get_global_configuration (cls ) -> Self :
0 commit comments