|
88 | 88 | "source": [ |
89 | 89 | "from time import sleep\n", |
90 | 90 | "\n", |
| 91 | + "\n", |
91 | 92 | "def inc(x):\n", |
92 | 93 | " sleep(1)\n", |
93 | 94 | " return x + 1\n", |
94 | 95 | "\n", |
| 96 | + "\n", |
95 | 97 | "def add(x, y):\n", |
96 | 98 | " sleep(1)\n", |
97 | 99 | " return x + y" |
|
139 | 141 | "source": [ |
140 | 142 | "import dask\n", |
141 | 143 | "\n", |
| 144 | + "\n", |
142 | 145 | "@dask.delayed\n", |
143 | 146 | "def inc(x):\n", |
144 | 147 | " sleep(1)\n", |
145 | 148 | " return x + 1\n", |
146 | 149 | "\n", |
| 150 | + "\n", |
147 | 151 | "@dask.delayed\n", |
148 | 152 | "def add(x, y):\n", |
149 | 153 | " sleep(1)\n", |
|
258 | 262 | "%%time\n", |
259 | 263 | "# Sequential code\n", |
260 | 264 | "\n", |
| 265 | + "\n", |
261 | 266 | "def inc(x):\n", |
262 | 267 | " sleep(1)\n", |
263 | 268 | " return x + 1\n", |
264 | 269 | "\n", |
| 270 | + "\n", |
265 | 271 | "results = []\n", |
266 | 272 | "for x in data:\n", |
267 | 273 | " y = inc(x)\n", |
268 | 274 | " results.append(y)\n", |
269 | | - " \n", |
| 275 | + "\n", |
270 | 276 | "total = sum(results)" |
271 | 277 | ] |
272 | 278 | }, |
|
305 | 311 | " sleep(1)\n", |
306 | 312 | " return x + 1\n", |
307 | 313 | "\n", |
| 314 | + "\n", |
308 | 315 | "results = []\n", |
309 | 316 | "for x in data:\n", |
310 | 317 | " y = inc(x)\n", |
311 | 318 | " results.append(y)\n", |
312 | | - " \n", |
| 319 | + "\n", |
313 | 320 | "total = sum(results)\n", |
314 | 321 | "print(\"Before computing:\", total) # Let's see what type of thing total is\n", |
315 | 322 | "result = total.compute()\n", |
|
347 | 354 | " sleep(1)\n", |
348 | 355 | " return 2 * x\n", |
349 | 356 | "\n", |
| 357 | + "\n", |
350 | 358 | "def is_even(x):\n", |
351 | 359 | " return not x % 2\n", |
352 | 360 | "\n", |
| 361 | + "\n", |
353 | 362 | "data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]" |
354 | 363 | ] |
355 | 364 | }, |
|
369 | 378 | " else:\n", |
370 | 379 | " y = inc(x)\n", |
371 | 380 | " results.append(y)\n", |
372 | | - " \n", |
| 381 | + "\n", |
373 | 382 | "total = sum(results)\n", |
374 | 383 | "print(total)" |
375 | 384 | ] |
|
402 | 411 | " sleep(1)\n", |
403 | 412 | " return 2 * x\n", |
404 | 413 | "\n", |
| 414 | + "\n", |
405 | 415 | "results = []\n", |
406 | 416 | "for x in data:\n", |
407 | 417 | " if is_even(x): # even\n", |
408 | 418 | " y = double(x)\n", |
409 | | - " else: # odd\n", |
| 419 | + " else: # odd\n", |
410 | 420 | " y = inc(x)\n", |
411 | 421 | " results.append(y)\n", |
412 | | - " \n", |
| 422 | + "\n", |
413 | 423 | "total = sum(results)" |
414 | 424 | ] |
415 | 425 | }, |
|
487 | 497 | "outputs": [], |
488 | 498 | "source": [ |
489 | 499 | "import os\n", |
490 | | - "sorted(os.listdir(os.path.join('data', 'nycflights')))" |
| 500 | + "\n", |
| 501 | + "sorted(os.listdir(os.path.join(\"data\", \"nycflights\")))" |
491 | 502 | ] |
492 | 503 | }, |
493 | 504 | { |
|
504 | 515 | "outputs": [], |
505 | 516 | "source": [ |
506 | 517 | "import pandas as pd\n", |
507 | | - "df = pd.read_csv(os.path.join('data', 'nycflights', '1990.csv'))\n", |
| 518 | + "\n", |
| 519 | + "df = pd.read_csv(os.path.join(\"data\", \"nycflights\", \"1990.csv\"))\n", |
508 | 520 | "df.head()" |
509 | 521 | ] |
510 | 522 | }, |
|
535 | 547 | "outputs": [], |
536 | 548 | "source": [ |
537 | 549 | "# Mean departure delay per-airport for one year\n", |
538 | | - "df.groupby('Origin').DepDelay.mean()" |
| 550 | + "df.groupby(\"Origin\").DepDelay.mean()" |
539 | 551 | ] |
540 | 552 | }, |
541 | 553 | { |
|
554 | 566 | "outputs": [], |
555 | 567 | "source": [ |
556 | 568 | "from glob import glob\n", |
557 | | - "filenames = sorted(glob(os.path.join('data', 'nycflights', '*.csv')))" |
| 569 | + "\n", |
| 570 | + "filenames = sorted(glob(os.path.join(\"data\", \"nycflights\", \"*.csv\")))" |
558 | 571 | ] |
559 | 572 | }, |
560 | 573 | { |
|
570 | 583 | "for fn in filenames:\n", |
571 | 584 | " # Read in file\n", |
572 | 585 | " df = pd.read_csv(fn)\n", |
573 | | - " \n", |
| 586 | + "\n", |
574 | 587 | " # Groupby origin airport\n", |
575 | | - " by_origin = df.groupby('Origin')\n", |
576 | | - " \n", |
| 588 | + " by_origin = df.groupby(\"Origin\")\n", |
| 589 | + "\n", |
577 | 590 | " # Sum of all departure delays by origin\n", |
578 | 591 | " total = by_origin.DepDelay.sum()\n", |
579 | | - " \n", |
| 592 | + "\n", |
580 | 593 | " # Number of flights by origin\n", |
581 | 594 | " count = by_origin.DepDelay.count()\n", |
582 | | - " \n", |
| 595 | + "\n", |
583 | 596 | " # Save the intermediates\n", |
584 | 597 | " sums.append(total)\n", |
585 | 598 | " counts.append(count)\n", |
|
647 | 660 | "# This is just one possible solution, there are\n", |
648 | 661 | "# several ways to do this using `dask.delayed`\n", |
649 | 662 | "\n", |
| 663 | + "\n", |
650 | 664 | "@dask.delayed\n", |
651 | 665 | "def read_file(filename):\n", |
652 | 666 | " # Read in file\n", |
653 | 667 | " return pd.read_csv(filename)\n", |
654 | 668 | "\n", |
| 669 | + "\n", |
655 | 670 | "sums = []\n", |
656 | 671 | "counts = []\n", |
657 | 672 | "for fn in filenames:\n", |
658 | 673 | " # Delayed read in file\n", |
659 | 674 | " df = read_file(fn)\n", |
660 | 675 | "\n", |
661 | 676 | " # Groupby origin airport\n", |
662 | | - " by_origin = df.groupby('Origin')\n", |
| 677 | + " by_origin = df.groupby(\"Origin\")\n", |
663 | 678 | "\n", |
664 | 679 | " # Sum of all departure delays by origin\n", |
665 | 680 | " total = by_origin.DepDelay.sum()\n", |
666 | 681 | "\n", |
667 | 682 | " # Number of flights by origin\n", |
668 | 683 | " count = by_origin.DepDelay.count()\n", |
669 | | - " \n", |
| 684 | + "\n", |
670 | 685 | " # Save the intermediates\n", |
671 | 686 | " sums.append(total)\n", |
672 | 687 | " counts.append(count)\n", |
|
0 commit comments