|
272 | 272 | } |
273 | 273 | ], |
274 | 274 | "source": [ |
275 | | - "df_titanic = pd.read_csv('titanic.csv')\n", |
276 | | - "df_titanic.head()" |
| 275 | + "# df_titanic = pd.read_csv('titanic.csv')\n", |
| 276 | + "# df_titanic.head()" |
277 | 277 | ] |
278 | 278 | }, |
279 | 279 | { |
|
328 | 328 | } |
329 | 329 | ], |
330 | 330 | "source": [ |
331 | | - "df_titanic.info()" |
| 331 | + "# df_titanic.info()" |
332 | 332 | ] |
333 | 333 | }, |
334 | 334 | { |
|
360 | 360 | } |
361 | 361 | ], |
362 | 362 | "source": [ |
363 | | - "#Další způsob, jak zobrazit počet chybějících hodnot v každém sloupci\n", |
364 | | - "df_titanic.isnull().sum()" |
| 363 | + "# Další způsob, jak zobrazit počet chybějících hodnot v každém sloupci\n", |
| 364 | + "#df_titanic.isnull().sum()" |
365 | 365 | ] |
366 | 366 | }, |
367 | 367 | { |
|
524 | 524 | ], |
525 | 525 | "source": [ |
526 | 526 | "# uklidit Fare, převést na float\n", |
527 | | - "df_titanic.Fare = df_titanic.Fare.str.replace('$','')\n", |
528 | | - "df_titanic['Fare'] = df_titanic.Fare.astype(float)\n", |
529 | | - "df_titanic.head()" |
| 527 | + "\n", |
| 528 | + "# df_titanic.Fare = df_titanic.Fare.str.replace('$','')\n", |
| 529 | + "# df_titanic['Fare'] = df_titanic.Fare.astype(float)\n", |
| 530 | + "# df_titanic.head()" |
530 | 531 | ] |
531 | 532 | }, |
532 | 533 | { |
|
692 | 693 | } |
693 | 694 | ], |
694 | 695 | "source": [ |
695 | | - "df_titanic.describe()" |
| 696 | + "# df_titanic.describe()" |
696 | 697 | ] |
697 | 698 | }, |
698 | 699 | { |
|
754 | 755 | ], |
755 | 756 | "source": [ |
756 | 757 | "# Kolik jich přežilo\n", |
757 | | - "f,ax=plt.subplots(figsize=(5,5))\n", |
758 | | - "sns.countplot(x='Survived',data=df_titanic, ax = ax)\n", |
759 | | - "ax.set_title('Zahynulí vs. přežili')\n", |
760 | | - "# Není nutné, jen k odstranění jakéhokoli výstupu\n", |
761 | | - "plt.show()" |
| 758 | + "\n", |
| 759 | + "# f,ax=plt.subplots(figsize=(5,5))\n", |
| 760 | + "# sns.countplot(x='Survived',data=df_titanic, ax = ax)\n", |
| 761 | + "# ax.set_title('Zahynulí vs. přežili')\n", |
| 762 | + "\n", |
| 763 | + "# plt.show()" |
762 | 764 | ] |
763 | 765 | }, |
764 | 766 | { |
|
781 | 783 | ], |
782 | 784 | "source": [ |
783 | 785 | "# počet přeživších\n", |
784 | | - "f,ax=plt.subplots(figsize=(5,5))\n", |
785 | | - "přeživší_počet = df_titanic.Survived.value_counts()\n", |
786 | | - "přeživší_počet.plot.bar(ax=ax)\n", |
787 | | - "ax.set_title('Zahynulí vs. přežili')\n", |
788 | | - "plt.show()" |
| 786 | + "\n", |
| 787 | + "# f,ax=plt.subplots(figsize=(5,5))\n", |
| 788 | + "# přeživší_počet = df_titanic.Survived.value_counts()\n", |
| 789 | + "# přeživší_počet.plot.bar(ax=ax)\n", |
| 790 | + "# ax.set_title('Zahynulí vs. přežili')\n", |
| 791 | + "# plt.show()" |
789 | 792 | ] |
790 | 793 | }, |
791 | 794 | { |
|
808 | 811 | ], |
809 | 812 | "source": [ |
810 | 813 | "# Procento přeživších počet (survivorCount)\n", |
811 | | - "f,ax=plt.subplots(figsize=(5,5))\n", |
812 | | - "survivorCount = df_titanic.Survived.value_counts(normalize=True)\n", |
813 | | - "survivorCount.plot.bar(ax=ax)\n", |
814 | | - "ax.set_title('Zahynulí vs. přežili')\n", |
815 | | - "ax.set_xticklabels( ['Zhynulo', 'Přežilo'], rotation=0)\n", |
816 | | - "plt.show()" |
| 814 | + "\n", |
| 815 | + "# f,ax=plt.subplots(figsize=(5,5))\n", |
| 816 | + "# survivorCount = df_titanic.Survived.value_counts(normalize=True)\n", |
| 817 | + "# survivorCount.plot.bar(ax=ax)\n", |
| 818 | + "# ax.set_title('Zahynulí vs. přežili')\n", |
| 819 | + "# ax.set_xticklabels( ['Zhynulo', 'Přežilo'], rotation=0)\n", |
| 820 | + "# plt.show()" |
817 | 821 | ] |
818 | 822 | }, |
819 | 823 | { |
|
857 | 861 | ], |
858 | 862 | "source": [ |
859 | 863 | "# Muž vs. Žena\n", |
860 | | - "f,ax=plt.subplots(figsize=(5,5))\n", |
861 | | - "sns.countplot(x='Sex',data=df_titanic,ax=ax)\n", |
862 | | - "ax.set_title('Muž vs. Žena')\n", |
863 | | - "plt.show()" |
| 864 | + "\n", |
| 865 | + "# f,ax=plt.subplots(figsize=(5,5))\n", |
| 866 | + "# sns.countplot(x='Sex',data=df_titanic,ax=ax)\n", |
| 867 | + "# ax.set_title('Muž vs. Žena')\n", |
| 868 | + "# plt.show()" |
864 | 869 | ] |
865 | 870 | }, |
866 | 871 | { |
|
928 | 933 | ], |
929 | 934 | "source": [ |
930 | 935 | "# míra přežití žen/mužů\n", |
931 | | - "df_titanic.groupby(['Sex'], as_index=False).agg({'Survived':'mean'})" |
| 936 | + "\n", |
| 937 | + "# df_titanic.groupby(['Sex'], as_index=False).agg({'Survived':'mean'})" |
932 | 938 | ] |
933 | 939 | }, |
934 | 940 | { |
|
961 | 967 | ], |
962 | 968 | "source": [ |
963 | 969 | "#Zhynul vs. přežil pro muže/ženu\n", |
964 | | - "f,ax=plt.subplots(figsize=(5,5))\n", |
965 | | - "sns.countplot(x='Sex',hue='Survived',data=df_titanic,ax=ax)\n", |
966 | | - "ax.set_title('Pohlaví: Zahynulý vs. Přežil')\n", |
967 | | - "plt.show()" |
| 970 | + "\n", |
| 971 | + "# f,ax=plt.subplots(figsize=(5,5))\n", |
| 972 | + "# sns.countplot(x='Sex',hue='Survived',data=df_titanic,ax=ax)\n", |
| 973 | + "# ax.set_title('Pohlaví: Zahynulý vs. Přežil')\n", |
| 974 | + "# plt.show()" |
968 | 975 | ] |
969 | 976 | }, |
970 | 977 | { |
|
1045 | 1052 | } |
1046 | 1053 | ], |
1047 | 1054 | "source": [ |
1048 | | - "df_titanic.groupby(['Pclass'], as_index=False).agg({'Survived':'mean'})" |
| 1055 | + "# df_titanic.groupby(['Pclass'], as_index=False).agg({'Survived':'mean'})" |
1049 | 1056 | ] |
1050 | 1057 | }, |
1051 | 1058 | { |
|
1077 | 1084 | ], |
1078 | 1085 | "source": [ |
1079 | 1086 | "# barový pozemek a seaborn počítat spiknutí\n", |
1080 | | - "f,ax=plt.subplots(figsize=(5,5))\n", |
1081 | | - "sns.countplot(x='Pclass',hue='Survived',data=df_titanic,ax=ax)\n", |
1082 | | - "ax.set_title('Pclass: Zahynulý vs. Přežil')\n", |
1083 | | - "plt.show()" |
| 1087 | + "\n", |
| 1088 | + "# f,ax=plt.subplots(figsize=(5,5))\n", |
| 1089 | + "# sns.countplot(x='Pclass',hue='Survived',data=df_titanic,ax=ax)\n", |
| 1090 | + "# ax.set_title('Pclass: Zahynulý vs. Přežil')\n", |
| 1091 | + "# plt.show()" |
1084 | 1092 | ] |
1085 | 1093 | }, |
1086 | 1094 | { |
|
1139 | 1147 | ], |
1140 | 1148 | "source": [ |
1141 | 1149 | "# použít dataframe hist(), který bude standardně zpracovávat NaN\n", |
1142 | | - "obr, ax = plt.subplots()\n", |
1143 | | - "df_titanic.Age.hist(ax=ax, bins=20, edgecolor='black', alpha=0.5)" |
| 1150 | + "\n", |
| 1151 | + "# obr, ax = plt.subplots()\n", |
| 1152 | + "# df_titanic.Age.hist(ax=ax, bins=20, edgecolor='black', alpha=0.5)" |
1144 | 1153 | ] |
1145 | 1154 | }, |
1146 | 1155 | { |
|
1183 | 1192 | ], |
1184 | 1193 | "source": [ |
1185 | 1194 | "# dataframe.hist(), který bude standardně zpracovávat NaN\n", |
1186 | | - "obr, ax = plt.subplots()\n", |
1187 | | - "df_titanic.Age.hist(ax=ax, label='all', bins=20, edgecolor='black', alpha=0.5)\n", |
1188 | | - "# stack přežil\n", |
1189 | | - "df_titanic[df_titanic.Survived==1].Age.hist(ax=ax, bins=20, color='g', label='survived', edgecolor='black', alpha=0.5)\n", |
1190 | | - "ax.set_title('Věková distribuce')\n", |
1191 | | - "ax.legend()" |
| 1195 | + "\n", |
| 1196 | + "# obr, ax = plt.subplots()\n", |
| 1197 | + "# df_titanic.Age.hist(ax=ax, label='all', bins=20, edgecolor='black', alpha=0.5)\n", |
| 1198 | + "# # stack přežil\n", |
| 1199 | + "# df_titanic[df_titanic.Survived==1].Age.hist(ax=ax, bins=20, color='g', label='survived', edgecolor='black', alpha=0.5)\n", |
| 1200 | + "# ax.set_title('Věková distribuce')\n", |
| 1201 | + "# ax.legend()" |
1192 | 1202 | ] |
1193 | 1203 | }, |
1194 | 1204 | { |
|
1239 | 1249 | } |
1240 | 1250 | ], |
1241 | 1251 | "source": [ |
1242 | | - "#check all missing data\n", |
1243 | | - "df_titanic.isnull().sum()" |
| 1252 | + "# check all missing data\n", |
| 1253 | + "\n", |
| 1254 | + "# df_titanic.isnull().sum()" |
1244 | 1255 | ] |
1245 | 1256 | }, |
1246 | 1257 | { |
|
1292 | 1303 | ], |
1293 | 1304 | "source": [ |
1294 | 1305 | "# naplnit NaN v Embarked s mode()\n", |
1295 | | - "df_titanic['Embarked'].fillna(df_titanic.Embarked.mode()[0],inplace=True)\n", |
1296 | | - "df_titanic.info()" |
| 1306 | + "\n", |
| 1307 | + "# df_titanic['Embarked'].fillna(df_titanic.Embarked.mode()[0],inplace=True)\n", |
| 1308 | + "# df_titanic.info()" |
1297 | 1309 | ] |
1298 | 1310 | }, |
1299 | 1311 | { |
|
1469 | 1481 | ], |
1470 | 1482 | "source": [ |
1471 | 1483 | "# extrahujte předponu z názvu\n", |
1472 | | - "df_titanic['Title']=df_titanic.Name.str.extract('([A-Za-z]+\\.)')\n", |
1473 | | - "df_titanic.head()" |
| 1484 | + "\n", |
| 1485 | + "# df_titanic['Title']=df_titanic.Name.str.extract('([A-Za-z]+\\.)')\n", |
| 1486 | + "# df_titanic.head()" |
1474 | 1487 | ] |
1475 | 1488 | }, |
1476 | 1489 | { |
|
1638 | 1651 | } |
1639 | 1652 | ], |
1640 | 1653 | "source": [ |
1641 | | - "df_titanic.Title = df_titanic.Title.str.upper()\n", |
1642 | | - "df_titanic.head()" |
| 1654 | + "# df_titanic.Title = df_titanic.Title.str.upper()\n", |
| 1655 | + "# df_titanic.head()" |
1643 | 1656 | ] |
1644 | 1657 | }, |
1645 | 1658 | { |
|
1683 | 1696 | } |
1684 | 1697 | ], |
1685 | 1698 | "source": [ |
1686 | | - "df_titanic.Title.value_counts()" |
| 1699 | + "# df_titanic.Title.value_counts()" |
1687 | 1700 | ] |
1688 | 1701 | }, |
1689 | 1702 | { |
|
1717 | 1730 | } |
1718 | 1731 | ], |
1719 | 1732 | "source": [ |
1720 | | - "df_titanic.Age.fillna(df_titanic.groupby('Title').Age.transform('mean'), inplace=True)\n", |
1721 | | - "df_titanic.info()" |
| 1733 | + "# df_titanic.Age.fillna(df_titanic.groupby('Title').Age.transform('mean'), inplace=True)\n", |
| 1734 | + "# df_titanic.info()" |
1722 | 1735 | ] |
1723 | 1736 | }, |
1724 | 1737 | { |
|
1869 | 1882 | } |
1870 | 1883 | ], |
1871 | 1884 | "source": [ |
1872 | | - "df_titanic[df_titanic.Name.str.contains('Asplund')]" |
| 1885 | + "# df_titanic[df_titanic.Name.str.contains('Asplund')]" |
1873 | 1886 | ] |
1874 | 1887 | }, |
1875 | 1888 | { |
|
2043 | 2056 | } |
2044 | 2057 | ], |
2045 | 2058 | "source": [ |
2046 | | - "df_titanic['FamilySize'] = df_titanic.Parch + df_titanic.SibSp + 1\n", |
2047 | | - "df_titanic.sample(5)" |
| 2059 | + "# df_titanic['FamilySize'] = df_titanic.Parch + df_titanic.SibSp + 1\n", |
| 2060 | + "# df_titanic.sample(5)" |
2048 | 2061 | ] |
2049 | 2062 | }, |
2050 | 2063 | { |
|
2187 | 2200 | } |
2188 | 2201 | ], |
2189 | 2202 | "source": [ |
2190 | | - "import statsmodels.formula.api as smf\n", |
2191 | | - "result = smf.ols(\"Fare ~ C(Pclass) + C(Embarked) + FamilySize\", data=df_titanic).fit()\n", |
2192 | | - "result.summary()" |
| 2203 | + "# import statsmodels.formula.api as smf\n", |
| 2204 | + "# result = smf.ols(\"Fare ~ C(Pclass) + C(Embarked) + FamilySize\", data=df_titanic).fit()\n", |
| 2205 | + "# result.summary()" |
2193 | 2206 | ] |
2194 | 2207 | } |
2195 | 2208 | ], |
|
0 commit comments