Skip to content

Commit ada8e60

Browse files
Finish refactor
* Add a progressbar * Fix a bug when looking up state_id * Add a ‘name’ index on states table * Add a switch to generate individual csv files * Refactor a few more things
1 parent ba54bc6 commit ada8e60

11 files changed

Lines changed: 127 additions & 28 deletions

.rubocop.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ CommentAnnotation:
3131
########################################
3232
# Style Cops
3333

34+
Style/ClassVars:
35+
Enabled: false
36+
3437
Style/Documentation:
3538
Enabled: false
3639

Gemfile.lock

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ PATH
44
free_zipcode_data (1.0.0)
55
colored (~> 1.2)
66
kiba (~> 2.0)
7+
ruby-progressbar (~> 1.9)
78
rubyzip (~> 1.2)
89
sqlite3 (~> 1.3)
910
trollop (~> 2.1)
@@ -51,6 +52,7 @@ GEM
5152
rainbow (>= 2.2.2, < 4.0)
5253
ruby-progressbar (~> 1.7)
5354
unicode-display_width (~> 1.0, >= 1.0.1)
55+
ruby-prof (0.17.0)
5456
ruby-progressbar (1.9.0)
5557
rubyzip (1.2.1)
5658
simplecov (0.16.1)
@@ -73,6 +75,7 @@ DEPENDENCIES
7375
rake (~> 12.0)
7476
rspec (~> 3.7)
7577
rubocop
78+
ruby-prof
7679
simplecov
7780

7881
BUNDLED WITH

free_zipcode_data.gemspec

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@ lib = File.expand_path('../lib', __FILE__)
55
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
66
require 'free_zipcode_data/version'
77

8+
# rubocop:disable Metrics/BlockLength
89
Gem::Specification.new do |spec|
910
spec.name = 'free_zipcode_data'
1011
spec.version = FreeZipcodeData::VERSION
1112
spec.authors = ['Chris Blackburn', 'Chris McKnight']
12-
spec.email = ['87a1779b@opayq.com']
13+
spec.email = ['87a1779b@opayq.com', 'fixme@mcknight.bogus']
1314
spec.summary = 'Free US postal codes in CSV and SQLite3 format.'
1415
spec.description = spec.summary
1516
spec.homepage = 'https://github.com/midwire/free_zipcode_data'
@@ -26,11 +27,14 @@ Gem::Specification.new do |spec|
2627
spec.add_development_dependency 'rake', '~> 12.0'
2728
spec.add_development_dependency 'rspec', '~> 3.7'
2829
spec.add_development_dependency 'rubocop'
30+
spec.add_development_dependency 'ruby-prof'
2931
spec.add_development_dependency 'simplecov'
3032

3133
spec.add_runtime_dependency 'colored', '~> 1.2'
3234
spec.add_runtime_dependency 'kiba', '~> 2.0'
35+
spec.add_runtime_dependency 'ruby-progressbar', '~> 1.9'
3336
spec.add_runtime_dependency 'rubyzip', '~> 1.2'
3437
spec.add_runtime_dependency 'sqlite3', '~> 1.3'
3538
spec.add_runtime_dependency 'trollop', '~> 2.1'
3639
end
40+
# rubocop:enable Metrics/BlockLength

lib/etl/free_zipcode_data_job.rb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,9 @@ def setup(country_file, database, logger, options)
3030
database: database,
3131
tablename: options[:zipcode_tablename]
3232

33+
post_process do
34+
logger.verbose('Finished generating table data...')
35+
end
3336
end
3437
end
3538
end

lib/free_zipcode_data/country_table.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def write(row)
3939
rescue SQLite3::ConstraintException
4040
# Swallow duplicates
4141
end
42+
43+
update_progress
4244
end
4345
end
4446
end

lib/free_zipcode_data/county_table.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ def build
2525

2626
def write(row)
2727
return nil unless row[:county]
28-
state_id = get_state_id(row[:short_state])
29-
raise "Could not find state: #{row[:short_state]}" unless state_id
28+
state_id = get_state_id(row[:short_state], row[:state])
29+
return nil unless state_id
30+
3031
sql = <<-SQL
3132
INSERT INTO counties (state_id, abbr, name)
3233
VALUES ('#{state_id}',
@@ -42,6 +43,8 @@ def write(row)
4243
rescue StandardError => err
4344
raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
4445
end
46+
47+
update_progress
4548
end
4649
end
4750
end

lib/free_zipcode_data/db_table.rb

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
11
# frozen_string_literal: true
22

33
require 'yaml'
4+
require 'ruby-progressbar'
45

56
module FreeZipcodeData
67
class DbTable
78
ISSUE_URL = 'https://github.com/midwire/free_zipcode_data/issues/new'
89

910
attr_reader :database, :tablename
11+
@@progressbar = nil
1012

1113
def initialize(database:, tablename:)
1214
@database = database
1315
@tablename = tablename
16+
lc = select_first('SELECT value FROM meta where name = "line_count"')
17+
@@progressbar = ProgressBar.create(total: lc.to_i * 4, format: '%t: |%B| %e')
18+
end
19+
20+
def update_progress
21+
@@progressbar.increment
1422
end
1523

1624
private
@@ -19,23 +27,28 @@ def country_lookup_table
1927
@country_lookup_table ||= YAML.load_file('country_lookup_table.yml')
2028
end
2129

22-
def get_country_id(country)
23-
rows = database.execute("SELECT id FROM countries WHERE alpha2 = '#{country}'")
30+
def select_first(sql)
31+
rows = database.execute(sql)
2432
rows[0].nil? ? nil : rows[0].first
33+
rescue SQLite3::SQLException => err
34+
raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
2535
end
2636

27-
def get_state_id(state)
28-
rows = database.execute("SELECT id FROM states WHERE abbr = '#{state}'")
29-
rows[0].nil? ? nil : rows[0].first
37+
def get_country_id(country)
38+
sql = "SELECT id FROM countries WHERE alpha2 = '#{country}'"
39+
select_first(sql)
40+
end
41+
42+
def get_state_id(state_abbr, state_name)
43+
sql = "SELECT id FROM states
44+
WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
45+
select_first(sql)
3046
end
3147

3248
def get_county_id(county)
3349
return nil if county.nil?
3450
sql = "SELECT id FROM counties WHERE name = '#{escape_single_quotes(county)}'"
35-
rows = database.execute(sql)
36-
rows[0].nil? ? nil : rows[0].first
37-
rescue SQLite3::SQLException => err
38-
raise "Please file an issue at #{ISSUE_URL}: [#{err}] -> SQL: [#{sql}]"
51+
select_first(sql)
3952
end
4053

4154
def escape_single_quotes(string)

lib/free_zipcode_data/runner.rb

Lines changed: 54 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66

77
require_relative '../etl/free_zipcode_data_job'
88

9-
require 'pry' if ENV.fetch('APP_ENV') == 'development'
9+
require 'pry' if ENV.fetch('APP_ENV', '') == 'development'
1010

1111
module FreeZipcodeData
12+
# rubocop:disable Metrics/ClassLength
1213
class Runner
1314
attr_accessor :logger, :options
1415

@@ -23,30 +24,38 @@ def initialize
2324

2425
def start
2526
start_time = Time.now
26-
options = FreeZipcodeData::Options.instance
27-
options.initialize_hash(collect_args)
27+
opt = FreeZipcodeData::Options.instance
28+
opt.initialize_hash(collect_args)
29+
@options = opt.hash
2830

29-
logger.info('Starting FreeZipcodeData...'.green)
31+
logger.info("Starting FreeZipcodeData v#{VERSION}...".green)
3032

31-
datasource = DataSource.new(options.hash.country)
33+
datasource = DataSource.new(options.country)
3234
datasource.download
3335

34-
database = SqliteRam.new(File.join(options.hash.work_dir, 'free_zipcode_data.sqlite3'))
36+
db_file = File.join(options.work_dir, 'free_zipcode_data.sqlite3')
37+
database = SqliteRam.new(db_file)
38+
configure_meta(database.conn, datasource.datafile)
3539

3640
%i[country state county zipcode].each { |t| initialize_table(t, database) }
3741

3842
extract_transform_load(datasource, database)
3943

44+
logger.info("Saving database to disk '#{db_file}'...")
4045
database.save_to_disk
4146

42-
elapsed = Time.now - start_time
43-
logger.info("Finished in [#{elapsed}] seconds.".yellow)
47+
if options.generate_files
48+
logger.info('Generating .csv files...')
49+
database.dump_tables(options.work_dir)
50+
end
51+
52+
elapsed = Time.at(Time.now - start_time).utc.strftime('%H:%M:%S')
53+
logger.info("Processed #{datasource_line_count} zipcodes in [#{elapsed}].".yellow)
4454
end
4555

4656
private
4757

4858
def initialize_table(table_sym, database)
49-
options = Options.instance.hash
5059
tablename = options["#{table_sym}_tablename".to_sym]
5160
logger.verbose("Initializing #{table_sym} table: '#{tablename}'...")
5261
klass = instance_eval("#{titleize(table_sym)}Table", __FILE__, __LINE__)
@@ -57,12 +66,37 @@ def initialize_table(table_sym, database)
5766
table.build
5867
end
5968

69+
def datasource_line_count(filename)
70+
@datasource_line_count ||= begin
71+
count = File.foreach(filename).inject(0) { |c, _line| c + 1 }
72+
logger.verbose("Processing #{count} zipcodes in '#{filename}'...")
73+
count
74+
end
75+
end
76+
77+
def configure_meta(database, datasource)
78+
schema = <<-SQL
79+
create table meta (
80+
id integer not null primary key,
81+
name varchar(255),
82+
value varchar(255)
83+
)
84+
SQL
85+
database.execute_batch(schema)
86+
87+
sql = <<-SQL
88+
INSERT INTO meta (name, value)
89+
VALUES ('line_count', #{datasource_line_count(datasource)})
90+
SQL
91+
database.execute(sql)
92+
end
93+
6094
def extract_transform_load(datasource, database)
6195
job = ETL::FreeZipcodeDataJob.setup(
6296
datasource.datafile,
6397
database.conn,
6498
logger,
65-
FreeZipcodeData::Options.instance.hash
99+
options
66100
)
67101
Kiba.run(job)
68102
end
@@ -71,15 +105,20 @@ def extract_transform_load(datasource, database)
71105
# rubocop:disable Metrics/MethodLength
72106
def collect_args
73107
Trollop.options do
108+
opt(
109+
:work_dir,
110+
'REQUIRED: Specify your work/build directory, where the SQLite and .csv files will be built',
111+
type: :string, required: true, short: '-w'
112+
)
74113
opt(
75114
:country,
76115
'Specify the country code for processing, or all countries if not specified',
77-
type: :string, required: false, short: '-g'
116+
type: :string, required: false, short: '-f'
78117
)
79118
opt(
80-
:work_dir,
81-
'Specify your work/build directory, where the SQLite and .csv files will be built',
82-
type: :string, required: true, short: '-w'
119+
:generate_files,
120+
'Generate CSV files: [counties.csv, states.csv, countries.csv, zipcodes.csv]',
121+
type: :boolean, required: false, short: '-g', default: false
83122
)
84123
opt(
85124
:country_tablename,
@@ -129,4 +168,5 @@ def titleize(string)
129168
ret
130169
end
131170
end
171+
# rubocop:enable Metrics/ClassLength
132172
end

lib/free_zipcode_data/sqlite_ram.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# frozen_string_literal: true
22

33
require 'sqlite3'
4+
require 'csv'
45

56
# Open a SQlite DB, work with it in-memory and save back to disk
67
class SqliteRam
@@ -18,4 +19,21 @@ def save_to_disk
1819
backup.step(-1)
1920
backup.finish
2021
end
22+
23+
def dump_tables(path)
24+
tables = conn.execute('select name from sqlite_master where type = "table"')
25+
sql = nil
26+
tables.each do |table_array|
27+
table = table_array.first
28+
headers_sql = "pragma table_info('#{table}')"
29+
header = conn.execute(headers_sql).map { |e| e[1] }
30+
CSV.open(File.join(path, "#{table}.csv"), 'w') do |csv|
31+
csv << header
32+
sql = "select * from #{table}"
33+
conn.execute(sql).each do |row_array|
34+
csv << row_array
35+
end
36+
end
37+
end
38+
end
2139
end

lib/free_zipcode_data/state_table.rb

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ def build
2020
ON #{tablename} (abbr, country_id COLLATE NOCASE ASC);
2121
SQL
2222
database.execute_batch(ndx)
23+
24+
ndx = <<-SQL
25+
CREATE UNIQUE INDEX "main"."state_name"
26+
ON #{tablename} (name COLLATE NOCASE ASC);
27+
SQL
28+
database.execute_batch(ndx)
2329
end
2430

2531
def write(row)
@@ -38,6 +44,8 @@ def write(row)
3844
rescue SQLite3::ConstraintException
3945
# Swallow duplicates
4046
end
47+
48+
update_progress
4149
end
4250
end
4351
end

0 commit comments

Comments
 (0)