-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlocation_request.rb
More file actions
284 lines (223 loc) · 8.41 KB
/
Copy pathlocation_request.rb
File metadata and controls
284 lines (223 loc) · 8.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# rubocop:disable Metrics/ClassLength
# TODO: Move some code out of this class
class LocationRequest < ActiveRecord::Base
include BerkeleyLibrary::Location
# ------------------------------------------------------------
# Constants
# Batch size for inserting LocationRecords
BATCH_SIZE = 10_000
MAX_OCLC_NUMBERS = 50_000
RESULT_ARGS = %i[oclc_number wc_symbols wc_error ht_record_url ht_error].freeze
MSG_NO_OCLC_NUMBERS = 'No OCLC numbers found in input spreadsheet'.freeze
MAX_ERRORS_TO_DISPLAY = 100
# ------------------------------------------------------------
# Relations
has_one_attached :input_file
has_one_attached :output_file
has_many :location_records, dependent: :delete_all
# ------------------------------------------------------------
# Validations
validates :email, presence: true
validates :filename, presence: true
validate :options_selected
# ------------------------------------------------------------
# Class methods
class << self
# Creates a new LocationRequest, validates the attached input file, and
# creates location records for each OCLC number.
def create_from(**options)
# This awkwardness is b/c we need the rollback to cancel
# creating associated LocationRecords, but we still want
# to return the created request
request = nil
transaction(requires_new: true) do
request = create_with_records(**options)
raise ActiveRecord::Rollback if request.errors.any?
end
request
rescue StandardError
# And this awkwardness is b/c ActiveStorage doesn't try to upload
# the files till after the transaction commits, so if that fails
# for some reason, we need to manually delete the record
clean_up(request) if request && request.persisted?
raise
end
def max_oclc_numbers
MAX_OCLC_NUMBERS.to_fs(:delimited)
end
private
# @param request [LocationRequest] the request
def clean_up(request)
blob = (input_file = request.input_file).attached? ? input_file.blob : nil
request.destroy
ensure
# If we were using S3 or whatever, this would be expensive
# and we should use purge_later, but since we're using
# local disk, let's just take care of it right now
blob.purge if blob
end
# @return LocationRequest the request
def create_with_records(input_file: nil, user: nil, **options)
filename = filename_from(input_file)
create(filename:, input_file:, **options).tap do |request|
ensure_location_records(request, input_file:)
ensure_admin_if_immediate(request, user:)
end
end
def filename_from(input_file)
# input_file can be an UploadedFile, or a hash -- see https://guides.rubyonrails.org/active_storage_overview.html#attaching-file-io-objects
return input_file.original_filename if input_file.respond_to?(:original_filename)
input_file[:filename] if input_file.is_a?(Hash)
end
def clean_input_file_message(msg, input_file)
if input_file.respond_to?(:path)
msg.gsub(input_file.path, filename_from(input_file))
else
msg
end
end
def ensure_location_records(request, input_file:)
request.ensure_location_records! if request.persisted?
rescue StandardError => e
logger.error("Error creating location records from input file #{request.filename}", e)
request.errors.add(:input_file, clean_input_file_message(e.message, input_file)) if request
end
def ensure_admin_if_immediate(request, user:)
return unless request.immediate?
return if user && user.framework_admin
logger.error("Can't trigger immediate location request unless logged in as Framework admin", user:)
request.errors.add(:immediate, I18n.t('activerecord.errors.models.location_task.attributes.immediate.forbidden'))
end
end
# ------------------------------------------------------------
# Synthetic accessors
def world_cat?
slf? || uc?
end
def incomplete?
wc_incomplete? || hathi_incomplete?
end
def hathi_incomplete?
hathi? && location_records.exists?(ht_retrieved: false)
end
def wc_incomplete?
world_cat? && location_records.exists?(wc_retrieved: false)
end
def output_filename
"#{File.basename(filename, '.*')}-processed.xlsx"
end
def record_count
location_records.count
end
def completed_records
conditions = {}.tap do |cnds|
cnds[:ht_retrieved] = true if hathi?
cnds[:wc_retrieved] = true if world_cat?
end
location_records.where(**conditions)
end
def completed_count
completed_records.count
end
def error_count
records_with_errors.count
end
def records_with_errors
conditions = [].tap do |cnds|
cnds << location_records.where.not(ht_error: nil) if hathi?
cnds << location_records.where.not(wc_error: nil) if world_cat?
end
conditions.inject { |rel, cnd| rel.or(cnd) }
end
def first_completed_at
completed_records.order(updated_at: :asc).limit(1).pick(:updated_at)
end
def last_completed_at
completed_records.order(updated_at: :desc).limit(1).pick(:updated_at)
end
# ------------------------------------------------------------
# Public instance methods
def ensure_location_records!
all_rows = each_input_oclc.map do |oclc_num|
{ location_request_id: id, oclc_number: oclc_num }
end
raise ArgumentError, MSG_NO_OCLC_NUMBERS if all_rows.empty?
# Also, ensure we don't have too many OCLC numbers - crazy librarians and 80K oclc numbers!
raise ArgumentError, I18n.t('location_request.errors.max_oclc_numbers', max: LocationRequest.max_oclc_numbers) if all_rows.size > MAX_OCLC_NUMBERS
# Insert in batches to prevent DB connection timeout on very large datasets
all_rows.each_slice(BATCH_SIZE) do |rows|
# rubocop:disable Rails/SkipsModelValidations
LocationRecord.insert_all(rows)
# rubocop:enable Rails/SkipsModelValidations
end
end
def each_input_oclc(&)
with_input_tmpfile do |tmpfile|
reader = BerkeleyLibrary::Location::XLSXReader.new(tmpfile.path)
reader.each_oclc_number(&)
end
end
def with_input_tmpfile(&)
input_file_uploaded? ? input_file.open(&) : with_uploaded_input_file(&)
end
def search_wc_symbols
return unless world_cat?
[].tap do |symbols|
symbols.concat(BerkeleyLibrary::Location::WorldCat::Symbols::SLF) if slf?
symbols.concat(BerkeleyLibrary::Location::WorldCat::Symbols::UC) if uc?
end
end
def input_file_uploaded?
input_file.attached? && input_file.service.exist?(input_file.key)
end
def output_file_uploaded?
output_file.attached? && output_file.service.exist?(output_file.key)
end
def ensure_output_file!
return if output_file_uploaded?
write_output_file!
end
# ------------------------------------------------------------
# Private methods
private
# Hack that lets us work with a newly uploaded input file before it's
# been "uploaded" to ActiveStorage::Service::DiskService
def with_uploaded_input_file
return unless (uploaded_file = attachment_changes['input_file']&.attachable)
# input_file can be an UploadedFile, or a hash -- see https://guides.rubyonrails.org/active_storage_overview.html#attaching-file-io-objects
io = uploaded_file.is_a?(Hash) ? uploaded_file[:io] : uploaded_file
begin
yield io
ensure
io.rewind
end
end
def options_selected
return if world_cat? || hathi?
errors.add(:base, 'At least one of SLF, Other UC, or HathiTrust must be selected')
end
def new_result(oclc_number, wc_sym_str, wc_error, ht_record_url, ht_error)
wc_symbols = (wc_sym_str ? wc_sym_str.split(',') : [])
LocationResult.new(oclc_number, wc_symbols:, wc_error:, ht_record_url:, ht_error:)
end
def write_output_file!
output_spreadsheet = input_spreadsheet.tap { |ss| write_results_to(ss) }
output_file.attach(
io: output_spreadsheet.stream,
filename: output_filename,
content_type: BerkeleyLibrary::Util::XLSX::Spreadsheet::MIME_TYPE_OOXML_WB,
identify: false
)
end
def write_results_to(ss)
writer = XLSXWriter.new(ss, slf:, uc:, hathi_trust: hathi)
result_data = location_records.pluck(*RESULT_ARGS)
result_data.each { |row| writer << new_result(*row) }
end
def input_spreadsheet
with_input_tmpfile do |tmpfile|
BerkeleyLibrary::Util::XLSX::Spreadsheet.new(tmpfile.path)
end
end
end
# rubocop:enable Metrics/ClassLength