Skip to content

Commit d9fbceb

Browse files
authored
Ap-253: update rubyzip to 3.0.0 (#20)
* AP-253: large zip, workflow information * add small raster.zip * rm raster.zip and test * Add smaller raster zip file and update geoserver spec
1 parent 25a981f commit d9fbceb

File tree

9 files changed

+160
-100
lines changed

9 files changed

+160
-100
lines changed

Gemfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ gem 'faraday-net_http_persistent', '~> 2.0'
77
gem 'geo_combine'
88
gem 'geoserver-publish', '~> 0.7.0'
99
gem 'rsolr'
10-
gem 'rubyzip'
10+
gem 'rubyzip', '3.0.0.alpha'
1111
gem "berkeley_library-docker", "~> 0.2.0"
1212
gem "listen", "~> 3.8"
1313
gem 'uri'

Gemfile.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ GEM
162162
rspec-support (~> 3.12.0)
163163
rspec-support (3.12.1)
164164
ruby2_keywords (0.0.5)
165-
rubyzip (2.3.2)
165+
rubyzip (3.0.0.alpha)
166166
sanitize (6.1.0)
167167
crass (~> 1.0.2)
168168
nokogiri (>= 1.12.0)
@@ -189,7 +189,7 @@ DEPENDENCIES
189189
pry (~> 0.14.2)
190190
rsolr
191191
rspec (~> 3.12)
192-
rubyzip
192+
rubyzip (= 3.0.0.alpha)
193193
uri
194194

195195
RUBY VERSION

docker-compose.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@ services:
3939
- ./data/spatial:/usr/local/apache2/htdocs/:ro
4040

4141
geoserver:
42-
image: containers.lib.berkeley.edu/gis/geoserver/v2.23.2
42+
image: containers.lib.berkeley.edu/gis/geoserver:latest
4343
ports:
4444
- 8080:8080
4545
volumes:
4646
- ./data/geoserver/public:/srv/geofiles:delegated
4747

4848
geoserver-secure:
49-
image: containers.lib.berkeley.edu/gis/geoserver/v2.23.2
49+
image: containers.lib.berkeley.edu/gis/geoserver:latest
5050
ports:
5151
- 8081:8080
5252
volumes:

lib/gingr/cli.rb

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,13 +80,7 @@ def geoserver(filename)
8080
option :geoserver_root
8181
def unpack(zipfile)
8282
zipfile_path = zipfile == File.basename(zipfile) ? File.join(ImportUtil.root_path, 'import', zipfile) : zipfile
83-
DataHandler.spatial_root = options[:spatial_root] || ENV.fetch('SPATIAL_ROOT',
84-
Config.default_options[:spatial_root])
85-
DataHandler.geoserver_root = options[:geoserver_root] || ENV.fetch('GEOSERVER_ROOT',
86-
Config.default_options[:geoserver_root])
87-
88-
gingr_watch_root_dir ||= ENV['GINGR_WATCH_DIRECTORY'] || '/opt/app/data/gingr'
89-
DataHandler.processing_root = File.join(gingr_watch_root_dir, 'processing')
83+
set_data_handler(options[:spatial_root], options[:geoserver_root])
9084
DataHandler.extract_and_move(zipfile_path)
9185
end
9286

@@ -107,12 +101,13 @@ def unpack(zipfile)
107101
option :geoserver_secure_url
108102
def all(zipfile)
109103
unpacked = unpack(zipfile)
110-
solr(unpacked[:extract_to_path])
104+
total_indexed = solr(unpacked[:extract_to_path])
111105

112106
geofile_names = unpacked[:geofile_name_hash]
113107
geoserver_urls = options.slice(:geoserver_url, :geoserver_secure_url).transform_keys(&:to_sym)
114-
Gingr::GeoserverPublisher.publish_inventory(geofile_names, **geoserver_urls)
115-
logger.info("#{zipfile} - all imported")
108+
failed_files = Gingr::GeoserverPublisher.publish_inventory(geofile_names, **geoserver_urls)
109+
110+
report(total_indexed, failed_files, zipfile)
116111
end
117112

118113
desc 'geoserver_workspace', 'create a workspace in a geoserver'
@@ -126,5 +121,29 @@ def geoserver_workspace(workspace_name = nil)
126121
publisher = GeoserverPublisher.new(options[:geoserver_url], default:, workspace_name:)
127122
publisher.create_workspace
128123
end
124+
125+
private
126+
127+
def set_data_handler(spatial_root, goserver_root)
128+
DataHandler.spatial_root = spatial_root || ENV.fetch('SPATIAL_ROOT',
129+
Config.default_options[:spatial_root])
130+
DataHandler.geoserver_root = goserver_root || ENV.fetch('GEOSERVER_ROOT',
131+
Config.default_options[:geoserver_root])
132+
gingr_watch_root_dir ||= ENV['GINGR_WATCH_DIRECTORY'] || '/opt/app/data/gingr'
133+
DataHandler.processing_root = File.join(gingr_watch_root_dir, 'processing')
134+
end
135+
136+
def report(total_indexed, failed_files, zipfile)
137+
if total_indexed.nil?
138+
logger.error('Solr indexing failed')
139+
logger.info("#{zipfile} - not imported")
140+
return
141+
end
142+
logger.info("#{zipfile} - all imported, total records: #{total_indexed}")
143+
return if failed_files.empty?
144+
145+
logger.warn("#{zipfile} - some shapefile or GeoTIFF files not published to Geoservers")
146+
logger.error("Failed to published geo files: #{failed_files.join('; ')}")
147+
end
129148
end
130149
end

lib/gingr/data_handler.rb

Lines changed: 82 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -20,51 +20,97 @@ class << self
2020
attr_accessor :spatial_root, :geoserver_root, :processing_root
2121

2222
def extract_and_move(zip_file)
23-
extract_to_path = extract_zipfile(zip_file)
23+
extract_to_path = perform_extraction(zip_file)
24+
summary = prepare_publishing_files(extract_to_path)
25+
geofile_name_hash = analyze_summary(summary)
26+
{ extract_to_path:, geofile_name_hash: }
27+
end
28+
29+
private
30+
31+
def analyze_summary(summary)
32+
public_map_files = []
33+
ucb_map_files = []
34+
summary.each do |summ|
35+
filename = summ[:map_filename]
36+
summ[:public_access] ? public_map_files << filename : ucb_map_files << filename
37+
end
38+
{ public_files: public_map_files.compact.reject(&:empty?), ucb_files: ucb_map_files.compact.reject(&:empty?) }
39+
end
40+
41+
# Extacting ingestion zip file to processing directory
42+
def perform_extraction(zip_file)
43+
extract_to_path = prepare_extract_to_path(zip_file)
44+
extract_zipfile(zip_file)
45+
extract_to_path
46+
end
2447

25-
geofile_ingestion_dir_path = move_files(extract_to_path)
26-
{ extract_to_path:, geofile_name_hash: get_geofile_name_hash(geofile_ingestion_dir_path) }
48+
def prepare_extract_to_path(zip_file)
49+
dir_name = File.basename(zip_file, '.*')
50+
extract_to_path = File.join(@processing_root, dir_name)
51+
clr_directory(extract_to_path)
52+
extract_to_path
53+
end
54+
55+
# Moving files to Geoserver and spatial server
56+
def prepare_publishing_files(extract_to_path)
57+
from_geofile_ingestion_path = File.join(extract_to_path, Config.geofile_ingestion_dirname)
58+
subdirectory_list(from_geofile_ingestion_path).map { |dir| move_a_record(dir) }
59+
rescue StandardError => e
60+
logger.error "An error occurred while extracting and moving files from #{from_geofile_ingestion_path}: #{e.message}"
2761
end
2862

2963
def extract_zipfile(zip_file, to_dir = @processing_root)
30-
extracted_to_path = clr_subdirectory(zip_file)
3164
Zip::File.open(zip_file) do |zip|
3265
zip.each do |entry|
33-
entry_path = File.join(to_dir, entry.name)
34-
entry.extract(entry_path) { true }
66+
entry.extract(destination_directory: to_dir) { true }
3567
end
3668
end
37-
extracted_to_path
69+
rescue StandardError => e
70+
logger.error "An unexpected error occurred during unzip #{zip_file}: #{e.message}"
71+
raise
3872
end
73+
74+
# some records may no have a map.zip files
75+
def move_a_record(dir_path)
76+
attributes = record_attributes(dir_path)
77+
arkid = File.basename(dir_path).strip
78+
map_filename = nil
79+
souredata_moved = false
3980

40-
def move_files(from_dir_path)
41-
geofile_ingestion_dir_path = File.join(from_dir_path, Config.geofile_ingestion_dirname)
42-
subdirectory_list(geofile_ingestion_dir_path).each do |subdirectory_path|
43-
move_a_record(subdirectory_path)
81+
subfile_list(dir_path).each do |file|
82+
filename = File.basename(file)
83+
map_filename = move_map_file(file, arkid, attributes) if filename == 'map.zip'
84+
souredata_moved = move_source_file(file, arkid, attributes[:public_access]) if filename == 'data.zip'
4485
end
45-
geofile_ingestion_dir_path
86+
logger.warning " '#{arkid} has no map.zip file, please check" if map_filename.nil?
87+
logger.warning " '#{arkid} has no data.zip file, please check" unless souredata_moved
88+
{ public_access: attributes[:public_access], map_filename: }
4689
end
4790

48-
def move_a_record(dir_path)
49-
subfile_list(dir_path).each do |file|
50-
if File.basename(file) == 'map.zip'
51-
dest_dir_path = file_path(dir_path, @geoserver_root)
52-
unzip_map_files(dest_dir_path, file)
53-
else
54-
dest_dir_path = file_path(dir_path, @spatial_root)
55-
mv_spatial_file(dest_dir_path, file)
56-
end
57-
end
91+
def move_map_file(file, arkid, attributes)
92+
dest_dir_path = file_path(@geoserver_root, arkid, attributes[:public_access])
93+
unzip_map_files(dest_dir_path, file)
94+
format = attributes[:format].downcase
95+
ext = format == 'shapefile' ? '.shp' : '.tif'
96+
"#{arkid}#{ext}"
97+
rescue StandardError => e
98+
logger.error "Failed to move map file '#{file}' for arkid '#{arkid}': #{e.message}"
99+
''
58100
end
59101

60-
# remove the subdirectory if it exists
61-
def clr_subdirectory(zip_file)
62-
subdir_name = File.basename(zip_file, '.*')
63-
subdir_path = File.join(@processing_root, subdir_name)
64-
FileUtils.rm_r(subdir_path) if File.directory? subdir_path
65-
subdir_path
102+
def move_source_file(file, arkid, public_access)
103+
dest_dir_path = file_path(@spatial_root, arkid, public_access)
104+
mv_spatial_file(dest_dir_path, file)
105+
true
106+
rescue StandardError => e
107+
logger.error "Failed to move soucedata '#{file}' for '#{arkid}': #{e.message}"
108+
end
109+
110+
def clr_directory(directory_name)
111+
FileUtils.rm_r(directory_name) if File.directory? directory_name
66112
rescue Errno::EACCES
67-
logger.error("Permission denied: #{subdir_path}")
113+
logger.error("Permission denied to clear #{directory_name}")
68114
raise
69115
end
70116

@@ -76,37 +122,13 @@ def subfile_list(directory_path)
76122
Pathname(directory_path).children.select(&:file?)
77123
end
78124

79-
def get_geofile_name_hash(directory_path)
80-
public_names = []
81-
ucb_names = []
82-
subdirectory_list(directory_path).each do |sub_dir|
83-
hash = name_access_hash(sub_dir)
84-
hash[:public_access] ? public_names << hash[:name] : ucb_names << hash[:name]
85-
end
86-
{ public: public_names, ucb: ucb_names }
87-
end
88-
89-
def access_type(dir)
90-
json_hash = geoblacklight_hash(dir)
91-
value = json_hash['dct_accessRights_s'].downcase
92-
value == 'public' ? 'public' : 'UCB'
93-
end
94-
95-
private
96-
97-
def geoblacklight_hash(dir)
125+
def record_attributes(dir)
98126
json_filepath = File.join(dir, 'geoblacklight.json')
99127
json_data = File.read(json_filepath)
100-
JSON.parse(json_data)
101-
end
102-
103-
def name_access_hash(dir)
104-
basename = File.basename(dir).split('_').last
105-
json_hash = geoblacklight_hash(dir)
128+
json_hash = JSON.parse(json_data)
129+
public_access = json_hash['dct_accessRights_s'].downcase == 'public'
106130
format = json_hash['dct_format_s'].downcase
107-
ext = format == 'shapefile' ? '.shp' : '.tif'
108-
access_right = json_hash['dct_accessRights_s'].downcase
109-
{ name: "#{basename}#{ext}", public_access: access_right == 'public' }
131+
{ public_access:, format: }
110132
end
111133

112134
def unzip_map_files(dest_dir, map_zipfile)
@@ -120,12 +142,12 @@ def mv_spatial_file(dest_dir, file)
120142
FileUtils.cp(file, to_file)
121143
end
122144

123-
def file_path(dir_path, root)
145+
def file_path(root, arkid, public_access )
124146
# geofiles/spatial/{UCB,public}/berkeley-{arkID}
125-
arkid = File.basename(dir_path).strip
126-
type = access_type(dir_path)
147+
type = public_access ? 'public' : 'UCB'
127148
File.join(root, type, "berkeley-#{arkid}")
128149
end
150+
129151
end
130152
end
131153
end

lib/gingr/geoserver_publisher.rb

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,24 @@ class GeoserverPublisher
1111
DEFAULT_REMOTE_ROOT = '/srv/geofiles'
1212
DEFAULT_WORKSPACE = 'UCB'
1313

14-
attr_reader :connection
15-
attr_reader :remote_root
16-
attr_reader :workspace_name
14+
attr_reader :connection, :remote_root, :workspace_name
1715

1816
class << self
1917
def publish_inventory(inventory, geoserver_url: nil, geoserver_secure_url: nil)
20-
if !inventory[:public].empty?
18+
public_files = inventory[:public_files]
19+
ucb_files = inventory[:ucb_files]
20+
un_published_shapefiles = []
21+
un_published_geotiffs = []
22+
unless public_files.empty?
2123
public_publisher = new(geoserver_url)
22-
public_publisher.batch_publish(inventory[:public])
24+
un_published_shapefiles = public_publisher.batch_publish(public_files)
2325
end
2426

25-
if !inventory[:ucb].empty?
27+
unless ucb_files.empty?
2628
secure_publisher = new(geoserver_secure_url, default: :geoserver_secure_url)
27-
secure_publisher.batch_publish(inventory[:ucb])
29+
un_published_geotiffs = secure_publisher.batch_publish(ucb_files)
2830
end
31+
(un_published_shapefiles + un_published_geotiffs).compact
2932
end
3033

3134
def parse_connection_string(geoserver_baseurl)
@@ -39,7 +42,7 @@ def parse_connection_string(geoserver_baseurl)
3942
port: uri.port == uri.default_port ? nil : uri.port,
4043
path: uri.path,
4144
fragment: uri.fragment,
42-
query: uri.query,
45+
query: uri.query
4346
).to_s, uri.user, uri.password
4447
end
4548
end
@@ -63,17 +66,15 @@ def initialize(conn = nil, default: nil, remote_root: nil, workspace_name: nil)
6366
end
6467

6568
def batch_publish(filenames)
66-
filenames.each(&method(:publish))
69+
filenames.map(&method(:publish))
6770
end
68-
71+
6972
def publish(filename)
7073
id = File.basename(filename, '.*')
7174
file_path = remote_filepath(id, filename)
72-
if File.extname(filename).casecmp?('.shp')
73-
publish_shapefile(file_path, id)
74-
else
75-
publish_geotiff(file_path, id)
76-
end
75+
return publish_shapefile(file_path, id) if File.extname(filename).casecmp?('.shp')
76+
77+
publish_geotiff(file_path, id)
7778
end
7879

7980
def create_workspace
@@ -92,11 +93,19 @@ def create_workspace
9293
def publish_shapefile(file_path, id)
9394
logger.debug("Publishing shapefile #{id} to #{geoserver_url}")
9495
Geoserver::Publish.shapefile(connection:, workspace_name:, file_path:, id:, title: id)
96+
nil
97+
rescue StandardError => e
98+
logger.error("Error publishing shapefile #{file_path} to #{geoserver_url}: #{e.message}")
99+
file_path
95100
end
96101

97102
def publish_geotiff(file_path, id)
98103
logger.debug("Publishing geotiff #{id} to #{geoserver_url}")
99104
Geoserver::Publish.geotiff(connection:, workspace_name:, file_path:, id:, title: id)
105+
nil
106+
rescue StandardError => e
107+
logger.error("Error publishing GeoTIFF #{file_path} to #{geoserver_url}: #{e.message}")
108+
file_path
100109
end
101110

102111
def remote_filepath(id, filename)

0 commit comments

Comments
 (0)