Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ gem 'faraday-net_http_persistent', '~> 2.0'
gem 'geo_combine'
gem 'geoserver-publish', '~> 0.7.0'
gem 'rsolr'
gem 'rubyzip'
gem 'rubyzip', '3.0.0.alpha'
gem "berkeley_library-docker", "~> 0.2.0"
gem "listen", "~> 3.8"
gem 'uri'
Expand Down
4 changes: 2 additions & 2 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ GEM
rspec-support (~> 3.12.0)
rspec-support (3.12.1)
ruby2_keywords (0.0.5)
rubyzip (2.3.2)
rubyzip (3.0.0.alpha)
sanitize (6.1.0)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
Expand All @@ -189,7 +189,7 @@ DEPENDENCIES
pry (~> 0.14.2)
rsolr
rspec (~> 3.12)
rubyzip
rubyzip (= 3.0.0.alpha)
uri

RUBY VERSION
Expand Down
4 changes: 2 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,14 @@ services:
- ./data/spatial:/usr/local/apache2/htdocs/:ro

geoserver:
image: containers.lib.berkeley.edu/gis/geoserver/v2.23.2
image: containers.lib.berkeley.edu/gis/geoserver:latest
ports:
- 8080:8080
volumes:
- ./data/geoserver/public:/srv/geofiles:delegated

geoserver-secure:
image: containers.lib.berkeley.edu/gis/geoserver/v2.23.2
image: containers.lib.berkeley.edu/gis/geoserver:latest
ports:
- 8081:8080
volumes:
Expand Down
39 changes: 29 additions & 10 deletions lib/gingr/cli.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,7 @@ def geoserver(filename)
option :geoserver_root
def unpack(zipfile)
zipfile_path = zipfile == File.basename(zipfile) ? File.join(ImportUtil.root_path, 'import', zipfile) : zipfile
DataHandler.spatial_root = options[:spatial_root] || ENV.fetch('SPATIAL_ROOT',
Config.default_options[:spatial_root])
DataHandler.geoserver_root = options[:geoserver_root] || ENV.fetch('GEOSERVER_ROOT',
Config.default_options[:geoserver_root])

gingr_watch_root_dir ||= ENV['GINGR_WATCH_DIRECTORY'] || '/opt/app/data/gingr'
DataHandler.processing_root = File.join(gingr_watch_root_dir, 'processing')
set_data_handler(options[:spatial_root], options[:geoserver_root])
DataHandler.extract_and_move(zipfile_path)
end

Expand All @@ -107,12 +101,13 @@ def unpack(zipfile)
option :geoserver_secure_url
def all(zipfile)
unpacked = unpack(zipfile)
solr(unpacked[:extract_to_path])
total_indexed = solr(unpacked[:extract_to_path])

geofile_names = unpacked[:geofile_name_hash]
geoserver_urls = options.slice(:geoserver_url, :geoserver_secure_url).transform_keys(&:to_sym)
Gingr::GeoserverPublisher.publish_inventory(geofile_names, **geoserver_urls)
logger.info("#{zipfile} - all imported")
failed_files = Gingr::GeoserverPublisher.publish_inventory(geofile_names, **geoserver_urls)

report(total_indexed, failed_files, zipfile)
end

desc 'geoserver_workspace', 'create a workspace in a geoserver'
Expand All @@ -126,5 +121,29 @@ def geoserver_workspace(workspace_name = nil)
publisher = GeoserverPublisher.new(options[:geoserver_url], default:, workspace_name:)
publisher.create_workspace
end

private

def set_data_handler(spatial_root, goserver_root)
DataHandler.spatial_root = spatial_root || ENV.fetch('SPATIAL_ROOT',
Config.default_options[:spatial_root])
DataHandler.geoserver_root = goserver_root || ENV.fetch('GEOSERVER_ROOT',
Config.default_options[:geoserver_root])
gingr_watch_root_dir ||= ENV['GINGR_WATCH_DIRECTORY'] || '/opt/app/data/gingr'
DataHandler.processing_root = File.join(gingr_watch_root_dir, 'processing')
end

def report(total_indexed, failed_files, zipfile)
if total_indexed.nil?
logger.error('Solr indexing failed')
logger.info("#{zipfile} - not imported")
return
end
logger.info("#{zipfile} - all imported, total records: #{total_indexed}")
return if failed_files.empty?

logger.warn("#{zipfile} - some shapefile or GeoTIFF files not published to Geoservers")
logger.error("Failed to published geo files: #{failed_files.join('; ')}")
end
end
end
142 changes: 82 additions & 60 deletions lib/gingr/data_handler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,51 +20,97 @@ class << self
attr_accessor :spatial_root, :geoserver_root, :processing_root

def extract_and_move(zip_file)
extract_to_path = extract_zipfile(zip_file)
extract_to_path = perform_extraction(zip_file)
summary = prepare_publishing_files(extract_to_path)
geofile_name_hash = analyze_summary(summary)
{ extract_to_path:, geofile_name_hash: }
end

private

def analyze_summary(summary)
public_map_files = []
ucb_map_files = []
summary.each do |summ|
filename = summ[:map_filename]
summ[:public_access] ? public_map_files << filename : ucb_map_files << filename
end
{ public_files: public_map_files.compact.reject(&:empty?), ucb_files: ucb_map_files.compact.reject(&:empty?) }
end

# Extacting ingestion zip file to processing directory
def perform_extraction(zip_file)
extract_to_path = prepare_extract_to_path(zip_file)
extract_zipfile(zip_file)
extract_to_path
end

geofile_ingestion_dir_path = move_files(extract_to_path)
{ extract_to_path:, geofile_name_hash: get_geofile_name_hash(geofile_ingestion_dir_path) }
def prepare_extract_to_path(zip_file)
dir_name = File.basename(zip_file, '.*')
extract_to_path = File.join(@processing_root, dir_name)
clr_directory(extract_to_path)
extract_to_path
end

# Moving files to Geoserver and spatial server
def prepare_publishing_files(extract_to_path)
from_geofile_ingestion_path = File.join(extract_to_path, Config.geofile_ingestion_dirname)
subdirectory_list(from_geofile_ingestion_path).map { |dir| move_a_record(dir) }
rescue StandardError => e
logger.error "An error occurred while extracting and moving files from #{from_geofile_ingestion_path}: #{e.message}"
end

def extract_zipfile(zip_file, to_dir = @processing_root)
extracted_to_path = clr_subdirectory(zip_file)
Zip::File.open(zip_file) do |zip|
zip.each do |entry|
entry_path = File.join(to_dir, entry.name)
entry.extract(entry_path) { true }
entry.extract(destination_directory: to_dir) { true }
end
end
extracted_to_path
rescue StandardError => e
logger.error "An unexpected error occurred during unzip #{zip_file}: #{e.message}"
raise
end

# some records may no have a map.zip files
def move_a_record(dir_path)
attributes = record_attributes(dir_path)
arkid = File.basename(dir_path).strip
map_filename = nil
souredata_moved = false

def move_files(from_dir_path)
geofile_ingestion_dir_path = File.join(from_dir_path, Config.geofile_ingestion_dirname)
subdirectory_list(geofile_ingestion_dir_path).each do |subdirectory_path|
move_a_record(subdirectory_path)
subfile_list(dir_path).each do |file|
filename = File.basename(file)
map_filename = move_map_file(file, arkid, attributes) if filename == 'map.zip'
souredata_moved = move_source_file(file, arkid, attributes[:public_access]) if filename == 'data.zip'
end
geofile_ingestion_dir_path
logger.warning " '#{arkid} has no map.zip file, please check" if map_filename.nil?
logger.warning " '#{arkid} has no data.zip file, please check" unless souredata_moved
{ public_access: attributes[:public_access], map_filename: }
end

def move_a_record(dir_path)
subfile_list(dir_path).each do |file|
if File.basename(file) == 'map.zip'
dest_dir_path = file_path(dir_path, @geoserver_root)
unzip_map_files(dest_dir_path, file)
else
dest_dir_path = file_path(dir_path, @spatial_root)
mv_spatial_file(dest_dir_path, file)
end
end
def move_map_file(file, arkid, attributes)
dest_dir_path = file_path(@geoserver_root, arkid, attributes[:public_access])
unzip_map_files(dest_dir_path, file)
format = attributes[:format].downcase
ext = format == 'shapefile' ? '.shp' : '.tif'
"#{arkid}#{ext}"
rescue StandardError => e
logger.error "Failed to move map file '#{file}' for arkid '#{arkid}': #{e.message}"
''
end

# remove the subdirectory if it exists
def clr_subdirectory(zip_file)
subdir_name = File.basename(zip_file, '.*')
subdir_path = File.join(@processing_root, subdir_name)
FileUtils.rm_r(subdir_path) if File.directory? subdir_path
subdir_path
def move_source_file(file, arkid, public_access)
dest_dir_path = file_path(@spatial_root, arkid, public_access)
mv_spatial_file(dest_dir_path, file)
true
rescue StandardError => e
logger.error "Failed to move soucedata '#{file}' for '#{arkid}': #{e.message}"
end

def clr_directory(directory_name)
FileUtils.rm_r(directory_name) if File.directory? directory_name
rescue Errno::EACCES
logger.error("Permission denied: #{subdir_path}")
logger.error("Permission denied to clear #{directory_name}")
raise
end

Expand All @@ -76,37 +122,13 @@ def subfile_list(directory_path)
Pathname(directory_path).children.select(&:file?)
end

def get_geofile_name_hash(directory_path)
public_names = []
ucb_names = []
subdirectory_list(directory_path).each do |sub_dir|
hash = name_access_hash(sub_dir)
hash[:public_access] ? public_names << hash[:name] : ucb_names << hash[:name]
end
{ public: public_names, ucb: ucb_names }
end

def access_type(dir)
json_hash = geoblacklight_hash(dir)
value = json_hash['dct_accessRights_s'].downcase
value == 'public' ? 'public' : 'UCB'
end

private

def geoblacklight_hash(dir)
def record_attributes(dir)
json_filepath = File.join(dir, 'geoblacklight.json')
json_data = File.read(json_filepath)
JSON.parse(json_data)
end

def name_access_hash(dir)
basename = File.basename(dir).split('_').last
json_hash = geoblacklight_hash(dir)
json_hash = JSON.parse(json_data)
public_access = json_hash['dct_accessRights_s'].downcase == 'public'
format = json_hash['dct_format_s'].downcase
ext = format == 'shapefile' ? '.shp' : '.tif'
access_right = json_hash['dct_accessRights_s'].downcase
{ name: "#{basename}#{ext}", public_access: access_right == 'public' }
{ public_access:, format: }
end

def unzip_map_files(dest_dir, map_zipfile)
Expand All @@ -120,12 +142,12 @@ def mv_spatial_file(dest_dir, file)
FileUtils.cp(file, to_file)
end

def file_path(dir_path, root)
def file_path(root, arkid, public_access )
# geofiles/spatial/{UCB,public}/berkeley-{arkID}
arkid = File.basename(dir_path).strip
type = access_type(dir_path)
type = public_access ? 'public' : 'UCB'
File.join(root, type, "berkeley-#{arkid}")
end

end
end
end
39 changes: 24 additions & 15 deletions lib/gingr/geoserver_publisher.rb
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,24 @@ class GeoserverPublisher
DEFAULT_REMOTE_ROOT = '/srv/geofiles'
DEFAULT_WORKSPACE = 'UCB'

attr_reader :connection
attr_reader :remote_root
attr_reader :workspace_name
attr_reader :connection, :remote_root, :workspace_name

class << self
def publish_inventory(inventory, geoserver_url: nil, geoserver_secure_url: nil)
if !inventory[:public].empty?
public_files = inventory[:public_files]
ucb_files = inventory[:ucb_files]
un_published_shapefiles = []
un_published_geotiffs = []
unless public_files.empty?
public_publisher = new(geoserver_url)
public_publisher.batch_publish(inventory[:public])
un_published_shapefiles = public_publisher.batch_publish(public_files)
end

if !inventory[:ucb].empty?
unless ucb_files.empty?
secure_publisher = new(geoserver_secure_url, default: :geoserver_secure_url)
secure_publisher.batch_publish(inventory[:ucb])
un_published_geotiffs = secure_publisher.batch_publish(ucb_files)
end
(un_published_shapefiles + un_published_geotiffs).compact
end

def parse_connection_string(geoserver_baseurl)
Expand All @@ -39,7 +42,7 @@ def parse_connection_string(geoserver_baseurl)
port: uri.port == uri.default_port ? nil : uri.port,
path: uri.path,
fragment: uri.fragment,
query: uri.query,
query: uri.query
).to_s, uri.user, uri.password
end
end
Expand All @@ -63,17 +66,15 @@ def initialize(conn = nil, default: nil, remote_root: nil, workspace_name: nil)
end

def batch_publish(filenames)
filenames.each(&method(:publish))
filenames.map(&method(:publish))
end

def publish(filename)
id = File.basename(filename, '.*')
file_path = remote_filepath(id, filename)
if File.extname(filename).casecmp?('.shp')
publish_shapefile(file_path, id)
else
publish_geotiff(file_path, id)
end
return publish_shapefile(file_path, id) if File.extname(filename).casecmp?('.shp')

publish_geotiff(file_path, id)
end

def create_workspace
Expand All @@ -92,11 +93,19 @@ def create_workspace
def publish_shapefile(file_path, id)
logger.debug("Publishing shapefile #{id} to #{geoserver_url}")
Geoserver::Publish.shapefile(connection:, workspace_name:, file_path:, id:, title: id)
nil
rescue StandardError => e
logger.error("Error publishing shapefile #{file_path} to #{geoserver_url}: #{e.message}")
file_path
end

def publish_geotiff(file_path, id)
logger.debug("Publishing geotiff #{id} to #{geoserver_url}")
Geoserver::Publish.geotiff(connection:, workspace_name:, file_path:, id:, title: id)
nil
rescue StandardError => e
logger.error("Error publishing GeoTIFF #{file_path} to #{geoserver_url}: #{e.message}")
file_path
end

def remote_filepath(id, filename)
Expand Down
Loading