Skip to content

Commit 2cb2a1a

Browse files
authored
Merge pull request #1175 from geocompx/refactor_location
Refactor Chapter 14 to use z22 Census 2022 data
2 parents cc9f1c6 + 61c7c0e commit 2cb2a1a

7 files changed

Lines changed: 326 additions & 241 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@ libs/
1717
geocompr2.rds
1818
*.gpkg
1919
figures/
20+
.claude/
21+
CLAUDE.md
22+
*_cache/

14-location.Rmd

Lines changed: 127 additions & 115 deletions
Large diffs are not rendered by default.

_14-ex.Rmd

Lines changed: 80 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -11,118 +11,116 @@ library(osmdata)
1111
library(spDataLarge)
1212
```
1313

14-
E1. Download the csv file containing inhabitant information for a 100 m cell resolution (https://www.zensus2011.de/SharedDocs/Downloads/DE/Pressemitteilung/DemografischeGrunddaten/csv_Bevoelkerung_100m_Gitter.zip?__blob=publicationFile&v=3).
15-
Please note that the unzipped file has a size of 1.23 GB.
16-
To read it into R, you can use `readr::read_csv`.
17-
This takes 30 seconds on a machine with 16 GB RAM.
18-
`data.table::fread()` might be even faster, and returns an object of class `data.table()`.
19-
Use `dplyr::as_tibble()` to convert it into a tibble.
20-
Build an inhabitant raster, aggregate it to a cell resolution of 1 km, and compare the difference with the inhabitant raster (`inh`) we have created using class mean values.
14+
E1. This exercise requires the **z22** package for accessing 100 m resolution data.
15+
Install it with `remotes::install_github("JsLth/z22")`.
16+
Load the population data at 100 m cell resolution using `z22::z22_data("population", res = "100m", year = 2022)`.
17+
Aggregate it to a cell resolution of 1 km using `terra::aggregate()` with `fun = sum`, and compare the result with the 1 km resolution data from `census_de`.
18+
Note that the 100 m data is much larger and may take some time to download.
2119

2220
```{r, 14-ex-e1, eval=FALSE}
2321
# Coarse inhabitant raster (1 km resolution)
2422
#*******************************************
2523
26-
# inhabitant raster (coarse resolution); this is one of the results of the
27-
# previous exercise
24+
# Load 1 km population data from spDataLarge
2825
data("census_de", package = "spDataLarge")
29-
input = select(census_de, x = x_mp_1km, y = y_mp_1km, pop = Einwohner,
30-
women = Frauen_A, mean_age = Alter_D, hh_size = HHGroesse_D)
31-
input_tidy = dplyr::mutate(input, dplyr::across(.fns = ~ifelse(. %in% c(-1, -9), NA, .)))
32-
input_ras = terra::rast(input_tidy, type = "xyz", crs = "EPSG:3035")
33-
inh_coarse = input_ras$pop
34-
# reclassify, i.e. convert the classes into inhabitant numbers using class means
35-
rcl = matrix(c(1, 1, 125, 2, 2, 375, 3, 3, 1250, 4, 4, 3000, 5, 5, 6000,
36-
6, 6, 8000), ncol = 3, byrow = TRUE)
37-
inh_coarse = terra::classify(inh_coarse, rcl = rcl, right = NA)
26+
pop_1km = census_de |>
27+
select(x, y, pop) |>
28+
mutate(pop = ifelse(pop < 0, NA, pop))
29+
inh_coarse = terra::rast(pop_1km, type = "xyz", crs = "EPSG:3035")
3830
3931
# Fine inhabitant raster (100 m resolution)
4032
#******************************************
41-
url =
42-
paste0("https://www.zensus2011.de/SharedDocs/Downloads/DE/Pressemitteilung/",
43-
"DemografischeGrunddaten/csv_Bevoelkerung_100m_Gitter.zip",
44-
"?__blob=publicationFile&v=3")
45-
# download fine raster
46-
download.file(url = url, destfile = file.path(tempdir(), "census.zip"),
47-
method = "auto", mode = "wb")
48-
# list the file names
49-
nms = unzip(file.path(tempdir(), "census.zip"), list = TRUE)
50-
# unzip only the csv file
51-
base_name = grep(".csv$", nms$Name, value = TRUE)
52-
unzip(file.path(tempdir(), "census.zip"), files = base_name, exdir = tempdir())
53-
# read in the csv file
54-
input = data.table::fread(file.path(tempdir(), base_name)) |>
55-
dplyr::as_tibble()
56-
input = select(input, x = starts_with("x_mp_1"),
57-
y = starts_with("y_mp_1"), inh = Einwohner)
58-
# set -1 and -9 to NA
59-
input = dplyr::mutate(input,
60-
dplyr::across(.fns = ~ifelse(. %in% c(-1, -9), NA, .)))
61-
# convert table into a raster (x and y are cell midpoints)
62-
inh_fine = terra::rast(input, type = "xyz", crs = "EPSG:3035")
63-
# Note that inh_fine contains the actual number of inhabitants per raster cell
64-
# instead of mean class values as was the case with its coarse 1km counterpart
33+
34+
# Load 100 m population data using z22 (this may take some time)
35+
pop_100m = z22::z22_data("population", res = "100m", year = 2022, as = "df")
36+
pop_100m = pop_100m |>
37+
rename(pop = cat_0) |>
38+
mutate(pop = ifelse(pop < 0, NA, pop))
39+
inh_fine = terra::rast(pop_100m, type = "xyz", crs = "EPSG:3035")
6540
6641
# Comparing the coarse with the fine raster
6742
#******************************************
6843
6944
# aggregate to the resolution of the coarse raster
70-
inh_fine = terra::aggregate(
71-
inh_fine, fact = terra::res(inh_coarse)[1] / terra::res(inh_fine)[1],
45+
inh_fine_agg = terra::aggregate(
46+
inh_fine, fact = terra::res(inh_coarse)[1] / terra::res(inh_fine)[1],
7247
fun = sum, na.rm = TRUE)
7348
# origin has to be the same
74-
terra::origin(inh_fine) = terra::origin(inh_coarse)
49+
terra::origin(inh_fine_agg) = terra::origin(inh_coarse)
7550
# make the comparison
76-
summary(inh_fine - inh_coarse)
77-
plot(inh_fine - inh_coarse)
78-
plot(abs(inh_fine - inh_coarse) > 1000)
79-
# the biggest deviations can be found in big cities like Berlin
80-
terra::global((abs(inh_fine - inh_coarse) > 1000), fun = "sum", na.rm = TRUE)
81-
# 18,121 cells have a deviation > 1000 inhabitants
82-
terra::global((abs(inh_fine - inh_coarse) > 5000), fun = "sum", na.rm = TRUE)
83-
# 338 cells have a deviation > 5000
51+
summary(inh_fine_agg - inh_coarse)
52+
plot(inh_fine_agg - inh_coarse)
53+
# Note: Since Census 2022 provides actual counts at both resolutions,
54+
# differences should be minimal (mainly due to rounding or edge effects)
55+
terra::global((abs(inh_fine_agg - inh_coarse) > 100), fun = "sum", na.rm = TRUE)
8456
```
8557

86-
E2. Suppose our bike shop predominantly sold electric bikes to older people.
58+
E2. Suppose our bike shop predominantly sold electric bikes to older people.
8759
Change the age raster accordingly, repeat the remaining analyses and compare the changes with our original result.
8860

8961
```{r, 14-ex-e2, eval=FALSE}
90-
# Here, we assue that you have already created `input_ras` in the first exercise.
62+
# Load data from spDataLarge
63+
data("census_de", package = "spDataLarge")
64+
65+
input_tidy = census_de |>
66+
mutate(across(c(pop, women, mean_age, hh_size), ~ifelse(.x < 0, NA, .x)))
67+
input_ras = terra::rast(input_tidy, type = "xyz", crs = "EPSG:3035")
68+
9169
# attach further necessary data
9270
data("metro_names", "shops", package = "spDataLarge")
9371
9472
# Basically, we are assuming that especially older people will use an electric
9573
# bike, therefore, we increase the weights for raster cells where predominantly
9674
# older people are living.
97-
rcl_pop = matrix(c(1, 1, 127, 2, 2, 375, 3, 3, 1250,
98-
4, 4, 3000, 5, 5, 6000, 6, 6, 8000),
99-
ncol = 3, byrow = TRUE)
100-
rcl_women = matrix(c(1, 1, 3, 2, 2, 2, 3, 3, 1, 4, 5, 0),
101-
ncol = 3, byrow = TRUE)
102-
# here we are giving the classes (3 to 5) containing the oldest people the
103-
# highest weight
104-
rcl_age = matrix(c(1, 1, 1, 2, 2, 1, 3, 5, 3),
105-
ncol = 3, byrow = TRUE)
106-
rcl_hh = rcl_women
107-
rcl = list(rcl_pop, rcl_women, rcl_age, rcl_hh)
108-
109-
reclass = input_ras
110-
for (i in 1:terra::nlyr(reclass)) {
75+
76+
# Reclassification matrices for continuous values
77+
rcl_women = matrix(c(
78+
0, 40, 3,
79+
40, 47, 2,
80+
47, 53, 1,
81+
53, 60, 0,
82+
60, 100, 0
83+
), ncol = 3, byrow = TRUE)
84+
85+
# For elderly electric bikes: give highest weights to older age groups
86+
rcl_age = matrix(c(
87+
0, 40, 0, # Young -> low weight
88+
40, 42, 0, # Young-ish -> low weight
89+
42, 44, 1, # Middle-aged -> some weight
90+
44, 47, 2, # Older -> higher weight
91+
47, 120, 3 # Elderly -> highest weight
92+
), ncol = 3, byrow = TRUE)
93+
94+
rcl_hh = matrix(c(
95+
0, 1.5, 3,
96+
1.5, 2.0, 2,
97+
2.0, 2.5, 1,
98+
2.5, 3.0, 0,
99+
3.0, 100, 0
100+
), ncol = 3, byrow = TRUE)
101+
102+
rcl = list(rcl_women, rcl_age, rcl_hh)
103+
104+
# Separate population (used as counts for metro detection) from variables to reclassify
105+
pop_ras = input_ras$pop
106+
demo_vars = c("women", "mean_age", "hh_size")
107+
reclass = input_ras[[demo_vars]]
108+
for (i in seq_len(terra::nlyr(reclass))) {
111109
reclass[[i]] = terra::classify(x = reclass[[i]], rcl = rcl[[i]], right = NA)
112110
}
113-
names(reclass) = names(input_ras)
111+
names(reclass) = demo_vars
114112
115-
# The rest of the analysis follows exactly the code presented in the book.
113+
# The rest of the analysis follows exactly the code presented in the book.
116114
117115
# Add metro names to metros sf object
118116
#************************************
119-
metro_names = dplyr::pull(metro_names, city) |>
117+
metro_names_vec = dplyr::pull(metro_names, city) |>
120118
as.character() |>
121119
{\(x) ifelse(x == "Velbert", "Düsseldorf", x)}() |>
122120
{\(x) gsub("ü", "ue", x)}()
123121
124-
pop_agg = terra::aggregate(reclass$pop, fact = 20, fun = sum, na.rm = TRUE)
125-
pop_agg = pop_agg[pop_agg > 500000, drop = FALSE]
122+
pop_agg = terra::aggregate(pop_ras, fact = 20, fun = sum, na.rm = TRUE)
123+
pop_agg = pop_agg[pop_agg > 500000, drop = FALSE]
126124
127125
polys = pop_agg |>
128126
terra::patches(directions = 8) |>
@@ -132,7 +130,7 @@ polys = pop_agg |>
132130
metros = polys |>
133131
dplyr::group_by(patches) |>
134132
dplyr::summarize()
135-
metros$metro_names = metro_names
133+
metros$metro_names = metro_names_vec
136134
137135
# Create shop/poi density raster
138136
#*******************************
@@ -142,23 +140,22 @@ poi = terra::rasterize(x = shops, y = reclass, field = "osm_id", fun = "length")
142140
# construct reclassification matrix
143141
int = classInt::classIntervals(values(poi), n = 4, style = "fisher")
144142
int = round(int$brks)
145-
rcl_poi = matrix(c(int[1], rep(int[-c(1, length(int))], each = 2),
143+
rcl_poi = matrix(c(int[1], rep(int[-c(1, length(int))], each = 2),
146144
int[length(int)] + 1), ncol = 2, byrow = TRUE)
147-
rcl_poi = cbind(rcl_poi, 0:3)
145+
rcl_poi = cbind(rcl_poi, 0:3)
148146
# reclassify
149-
poi = terra::classify(poi, rcl = rcl_poi, right = NA)
147+
poi = terra::classify(poi, rcl = rcl_poi, right = NA)
150148
names(poi) = "poi"
151-
# remove population raster and add poi raster
152-
reclass = reclass[[names(reclass) != "pop"]] |>
153-
c(poi)
149+
# add poi raster to demographic weights
150+
reclass = c(reclass, poi)
154151
155152
# Identify suitable locations
156153
#****************************
157154
# calculate the total score
158155
result = sum(reclass)
159156
160157
# have a look at suitable bike shop locations in Berlin
161-
berlin = metros[metro_names == "Berlin", ]
158+
berlin = metros[metros$metro_names == "Berlin", ]
162159
berlin_raster = terra::crop(result, berlin)
163160
# summary(berlin_raster)
164161
# berlin_raster
@@ -168,6 +165,6 @@ berlin_raster[berlin_raster == 0] = NA
168165
leaflet::leaflet() |>
169166
leaflet::addTiles() |>
170167
leaflet::addRasterImage(raster::raster(berlin_raster), colors = "darkgreen", opacity = 0.8) |>
171-
leaflet::addLegend("bottomright", colors = c("darkgreen"),
168+
leaflet::addLegend("bottomright", colors = c("darkgreen"),
172169
labels = c("potential locations"), title = "Legend")
173170
```

0 commit comments

Comments
 (0)