ref #7313 creating vocabulary before terms
[cdm-vaadin.git] / src / main / java / eu / etaxonomy / cdm / dataInserter / RegistrationRequiredDataInserter.java
1 /**
2 * Copyright (C) 2017 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.dataInserter;
10
11 import java.io.IOException;
12 import java.util.ArrayList;
13 import java.util.Arrays;
14 import java.util.EnumSet;
15 import java.util.HashMap;
16 import java.util.HashSet;
17 import java.util.List;
18 import java.util.Map;
19 import java.util.Set;
20 import java.util.UUID;
21
22 import org.apache.log4j.Logger;
23 import org.joda.time.DateTime;
24 import org.joda.time.DateTimeFieldType;
25 import org.joda.time.Partial;
26 import org.joda.time.format.DateTimeFormatter;
27 import org.springframework.context.event.ContextRefreshedEvent;
28 import org.springframework.security.authentication.AuthenticationCredentialsNotFoundException;
29 import org.springframework.security.core.GrantedAuthority;
30 import org.springframework.transaction.TransactionStatus;
31 import org.springframework.transaction.annotation.Transactional;
32
33 import com.fasterxml.jackson.core.JsonParseException;
34 import com.fasterxml.jackson.databind.JsonMappingException;
35 import com.fasterxml.jackson.databind.ObjectMapper;
36
37 import eu.etaxonomy.cdm.api.application.AbstractDataInserter;
38 import eu.etaxonomy.cdm.api.application.CdmRepository;
39 import eu.etaxonomy.cdm.api.service.pager.Pager;
40 import eu.etaxonomy.cdm.model.agent.AgentBase;
41 import eu.etaxonomy.cdm.model.agent.Institution;
42 import eu.etaxonomy.cdm.model.common.DefinedTerm;
43 import eu.etaxonomy.cdm.model.common.DefinedTermBase;
44 import eu.etaxonomy.cdm.model.common.ExtensionType;
45 import eu.etaxonomy.cdm.model.common.GrantedAuthorityImpl;
46 import eu.etaxonomy.cdm.model.common.Group;
47 import eu.etaxonomy.cdm.model.common.TermVocabulary;
48 import eu.etaxonomy.cdm.model.common.TimePeriod;
49 import eu.etaxonomy.cdm.model.name.Registration;
50 import eu.etaxonomy.cdm.model.name.RegistrationStatus;
51 import eu.etaxonomy.cdm.model.name.TaxonName;
52 import eu.etaxonomy.cdm.model.name.TypeDesignationBase;
53 import eu.etaxonomy.cdm.model.reference.Reference;
54 import eu.etaxonomy.cdm.persistence.hibernate.permission.CRUD;
55 import eu.etaxonomy.cdm.persistence.hibernate.permission.CdmAuthority;
56 import eu.etaxonomy.cdm.persistence.hibernate.permission.CdmPermissionClass;
57 import eu.etaxonomy.cdm.persistence.hibernate.permission.Role;
58 import eu.etaxonomy.cdm.persistence.query.MatchMode;
59 import eu.etaxonomy.cdm.vaadin.model.registration.KindOfUnitTerms;
60 import eu.etaxonomy.cdm.vaadin.permission.RolesAndPermissions;
61
62 /**
63 *
64 * Can create missing registrations for names which have Extensions of the Type <code>IAPTRegdata.json</code>.
65 * See https://dev.e-taxonomy.eu/redmine/issues/6621 for further details.
66 * This feature can be activated by by supplying one of the following jvm command line arguments:
67 * <ul>
68 * <li><code>-DregistrationCreate=iapt</code>: create all iapt Registrations if missing</li>
69 * <li><code>-DregistrationWipeout=iapt</code>: remove all iapt Registrations</li>
70 * <li><code>-DregistrationWipeout=all</code>: remove all Registrations</li>
71 * </ul>
72 * The <code>-DregistrationWipeout</code> commands are executed before the <code>-DregistrationCreate</code> and will not change the name and type designations.
73 *
74 * @author a.kohlbecker
75 * @since May 9, 2017
76 *
77 */
78 public class RegistrationRequiredDataInserter extends AbstractDataInserter {
79
80 protected static final String PARAM_NAME_CREATE = "registrationCreate";
81
82 protected static final String PARAM_NAME_WIPEOUT = "registrationWipeout";
83
84 protected static final UUID GROUP_SUBMITTER_UUID = UUID.fromString("c468c6a7-b96c-4206-849d-5a825f806d3e");
85
86 protected static final UUID GROUP_CURATOR_UUID = UUID.fromString("135210d3-3db7-4a81-ab36-240444637d45");
87
88 private static final EnumSet<CRUD> CREATE_READ = EnumSet.of(CRUD.CREATE, CRUD.READ);
89 private static final EnumSet<CRUD> CREATE_READ_UPDATE_DELETE = EnumSet.of(CRUD.CREATE, CRUD.READ, CRUD.UPDATE, CRUD.DELETE);
90
91 private static final Logger logger = Logger.getLogger(RegistrationRequiredDataInserter.class);
92
93 private ExtensionType extensionTypeIAPTRegData;
94
95 Map<String, Institution> instituteMap = new HashMap<>();
96
97 public static boolean commandsExecuted = false;
98
99 private CdmRepository repo;
100
101 private boolean hasRun = false;
102
103 public void setCdmRepository(CdmRepository repo){
104 this.repo = repo;
105 }
106
107
108 // ==================== Registration creation ======================= //
109
110 /**
111 * {@inheritDoc}
112 */
113 @Override
114 public void onApplicationEvent(ContextRefreshedEvent event) {
115
116 if(hasRun){
117 return;
118 }
119
120 runAsAuthentication(Role.ROLE_ADMIN);
121
122 insertRequiredData();
123 executeSuppliedCommands();
124
125 restoreAuthentication();
126
127 hasRun = true;
128 }
129
130 /**
131 *
132 */
133 @Transactional
134 private void insertRequiredData() {
135
136 TransactionStatus txStatus = repo.startTransaction(false);
137
138 Role roleCuration = RolesAndPermissions.ROLE_CURATION;
139 if(repo.getGrantedAuthorityService().find(roleCuration.getUuid()) == null){
140 repo.getGrantedAuthorityService().saveOrUpdate(roleCuration.asNewGrantedAuthority());
141 }
142
143 Group groupCurator = repo.getGroupService().load(GROUP_CURATOR_UUID, Arrays.asList("grantedAuthorities"));
144 if(groupCurator == null){
145 groupCurator = Group.NewInstance();
146 groupCurator.setUuid(GROUP_CURATOR_UUID);
147 groupCurator.setName("Curator");
148 }
149 assureGroupHas(groupCurator, new CdmAuthority(CdmPermissionClass.REGISTRATION, CREATE_READ_UPDATE_DELETE).toString());
150 repo.getGroupService().saveOrUpdate(groupCurator);
151
152 Group groupSubmitter = repo.getGroupService().load(GROUP_SUBMITTER_UUID, Arrays.asList("grantedAuthorities"));
153 if(groupSubmitter == null){
154 groupSubmitter = Group.NewInstance();
155 groupSubmitter.setUuid(GROUP_SUBMITTER_UUID);
156 groupSubmitter.setName("Submitter");
157 }
158 assureGroupHas(groupSubmitter, new CdmAuthority(CdmPermissionClass.TAXONNAME, CREATE_READ).toString());
159 assureGroupHas(groupSubmitter, new CdmAuthority(CdmPermissionClass.TEAMORPERSONBASE, CREATE_READ).toString());
160 assureGroupHas(groupSubmitter, new CdmAuthority(CdmPermissionClass.REGISTRATION, CREATE_READ).toString());
161 assureGroupHas(groupSubmitter, new CdmAuthority(CdmPermissionClass.REFERENCE, CREATE_READ).toString());
162 assureGroupHas(groupSubmitter, new CdmAuthority(CdmPermissionClass.SPECIMENOROBSERVATIONBASE, CREATE_READ).toString());
163 assureGroupHas(groupSubmitter, new CdmAuthority(CdmPermissionClass.COLLECTION, CREATE_READ).toString());
164 repo.getGroupService().saveOrUpdate(groupSubmitter);
165
166 TermVocabulary<DefinedTerm> kindOfUnitVocabulary = repo.getVocabularyService().find(KindOfUnitTerms.KIND_OF_UNIT_VOCABULARY().getUuid());
167 if(repo.getVocabularyService().find(KindOfUnitTerms.KIND_OF_UNIT_VOCABULARY().getUuid()) == null){
168 kindOfUnitVocabulary = repo.getVocabularyService().save(KindOfUnitTerms.KIND_OF_UNIT_VOCABULARY());
169 }
170
171 DefinedTermBase kouSpecimen = repo.getTermService().find(KindOfUnitTerms.SPECIMEN().getUuid());
172 DefinedTermBase kouImage = repo.getTermService().find(KindOfUnitTerms.PUBLISHED_IMAGE().getUuid());
173 DefinedTermBase kouUnpublishedImage = repo.getTermService().find(KindOfUnitTerms.UNPUBLISHED_IMAGE().getUuid());
174 DefinedTermBase kouCulture = repo.getTermService().find(KindOfUnitTerms.CULTURE_METABOLIC_INACTIVE().getUuid());
175
176 if(kouSpecimen == null){
177 kouSpecimen = repo.getTermService().save(KindOfUnitTerms.SPECIMEN());
178 }
179 if(kouImage == null){
180 kouImage = repo.getTermService().save(KindOfUnitTerms.PUBLISHED_IMAGE());
181 }
182 if(kouUnpublishedImage == null){
183 kouUnpublishedImage = repo.getTermService().save(KindOfUnitTerms.UNPUBLISHED_IMAGE());
184 }
185 if(kouCulture == null){
186 kouCulture = repo.getTermService().save(KindOfUnitTerms.CULTURE_METABOLIC_INACTIVE());
187 }
188
189 Set<DefinedTerm> termInVocab = kindOfUnitVocabulary.getTerms();
190 List<DefinedTermBase> kouTerms = Arrays.asList(kouCulture, kouImage, kouSpecimen, kouUnpublishedImage);
191
192 for(DefinedTermBase t : kouTerms){
193 if(!termInVocab.contains(t)){
194 kindOfUnitVocabulary.addTerm((DefinedTerm)t);
195 }
196 }
197
198 repo.commitTransaction(txStatus);
199
200 }
201
202 private void assureGroupHas(Group group, String authorityString){
203 boolean authorityExists = false;
204
205 for(GrantedAuthority ga : group.getGrantedAuthorities()){
206 if((authorityExists = ga.getAuthority().equals(authorityString)) == true){
207 break;
208 }
209 }
210 if(!authorityExists){
211 group.addGrantedAuthority(findGrantedAuthority(authorityString));
212 }
213 }
214
215 private GrantedAuthorityImpl findGrantedAuthority(String authorityString){
216 GrantedAuthorityImpl ga = null;
217 try{
218 ga = repo.getGrantedAuthorityService().findAuthorityString(authorityString);
219 } catch (AuthenticationCredentialsNotFoundException e){
220 e.printStackTrace();
221 }
222 if(ga == null){
223 ga = GrantedAuthorityImpl.NewInstance(authorityString);
224 repo.getGrantedAuthorityService().save(ga);
225 }
226 return ga;
227 }
228
229 /**
230 *
231 */
232
233 private void executeSuppliedCommands() {
234
235 if(commandsExecuted){
236 // do not run twice
237 // a second run could take place during initialization of the web context
238 return;
239 }
240 commandsExecuted = true;
241
242 String wipeoutCmd = System.getProperty(PARAM_NAME_WIPEOUT);
243 String createCmd = System.getProperty(PARAM_NAME_CREATE);
244
245 // ============ DELETE
246 if(wipeoutCmd != null && wipeoutCmd.matches("iapt|all")){
247
248 boolean onlyIapt = wipeoutCmd.equals("iapt");
249 Set<UUID> deleteCandidates = new HashSet<UUID>();
250
251 TransactionStatus tx = repo.startTransaction(true);
252 List<Registration> allRegs = repo.getRegistrationService().list(null, null, null, null, null);
253 for(Registration reg : allRegs){
254 if(onlyIapt){
255 try {
256 @SuppressWarnings("unchecked")
257 Set<String> extensions = reg.getName().getExtensions(getExtensionTypeIAPTRegData());
258 if(reg.getUuid() != null){
259 deleteCandidates.add(reg.getUuid());
260 }
261 } catch(NullPointerException e){
262 // IGNORE
263 }
264 } else {
265 if(reg.getUuid() != null){
266 deleteCandidates.add(reg.getUuid());
267 }
268 }
269 }
270 repo.commitTransaction(tx);
271 if(!deleteCandidates.isEmpty()){
272 try {
273 repo.getRegistrationService().delete(deleteCandidates);
274 } catch (Exception e) {
275 // MySQLIntegrityConstraintViolationException happens here every second run !!!
276 logger.error(e);
277 }
278 }
279 }
280
281 // ============ CREATE
282 int pageIndex = 0;
283 if(createCmd != null && createCmd.equals("iapt")){
284
285 DateTimeFormatter dateFormat1 = org.joda.time.format.DateTimeFormat.forPattern("dd.MM.yy").withPivotYear(1950);
286 DateTimeFormatter dateFormat2 = org.joda.time.format.DateTimeFormat.forPattern("yyyy-MM-dd").withPivotYear(1950);
287
288 TransactionStatus tx = repo.startTransaction(false);
289 while(true) {
290 Pager<TaxonName> pager = repo.getNameService().page(null, 1000, pageIndex, null, null);
291 if(pager.getRecords().isEmpty()){
292 break;
293 }
294 List<Registration> newRegs = new ArrayList<>(pager.getRecords().size());
295 for(TaxonName name : pager.getRecords()){
296
297
298
299 Set<String> extensionValues = name.getExtensions(getExtensionTypeIAPTRegData());
300
301 // there is for sure only one
302 if(extensionValues.isEmpty()){
303 continue;
304 }
305
306 logger.debug("IAPT Registration for " + name.getTitleCache() + " ...");
307
308 String iaptJson = extensionValues.iterator().next();
309 try {
310
311 IAPTRegData iaptData = new ObjectMapper().readValue(iaptJson, IAPTRegData.class);
312
313 if(iaptData.getRegId() == null){
314 continue;
315 }
316
317 DateTime regDate = null;
318 if(iaptData.getDate() != null){
319 DateTimeFormatter dateFormat;
320 if(iaptData.getDate().matches("\\d{4}-\\d{2}-\\d{2}")){
321 dateFormat = dateFormat2;
322 } else {
323 dateFormat = dateFormat1;
324 }
325 try {
326 regDate = dateFormat.parseDateTime(iaptData.getDate());
327 regDate.getYear();
328 } catch (Exception e) {
329 logger.error("Error parsing date : " + iaptData.getDate(), e);
330 continue;
331 }
332 }
333
334 Registration reg = Registration.NewInstance();
335 reg.setStatus(RegistrationStatus.PUBLISHED);
336 reg.setIdentifier("http://phycobank.org/" + iaptData.getRegId());
337 reg.setSpecificIdentifier(iaptData.getRegId().toString());
338 reg.setInstitution(getInstitution(iaptData.getOffice()));
339
340 boolean isPhycobankID = Integer.valueOf(reg.getSpecificIdentifier()) >= 100000;
341
342 Partial youngestDate = null;
343 Reference youngestPub = null;
344
345 // find youngest publication
346
347 // NOTE:
348 // data imported from IAPT does not have typedesignation citations and sometimes no nomref
349
350 if(isPhycobankID){
351 youngestPub = name.getNomenclaturalReference();
352 youngestDate = partial(youngestPub.getDatePublished());
353
354 if(name.getTypeDesignations() != null && !name.getTypeDesignations().isEmpty()){
355 for(TypeDesignationBase<?> td : name.getTypeDesignations()){
356 if(td.getCitation() == null){
357 continue;
358 }
359 Partial pubdate = partial(td.getCitation().getDatePublished());
360 if(pubdate != null){
361 if(youngestDate== null || comparePartials(youngestDate, pubdate)){
362 youngestDate = pubdate;
363 youngestPub = td.getCitation();
364 }
365 }
366 }
367 }
368 }
369
370 if((isPhycobankID && youngestPub == name.getNomenclaturalReference()) || !isPhycobankID) {
371 reg.setName(name);
372 } else {
373 logger.debug("skipping name published in older referece");
374 }
375 if(name.getTypeDesignations() != null && !name.getTypeDesignations().isEmpty()){
376 // do not add the collection directly to avoid "Found shared references to a collection" problem
377 Set<TypeDesignationBase> typeDesignations = new HashSet<>(name.getTypeDesignations().size());
378 for(TypeDesignationBase<?> td : name.getTypeDesignations()){
379 if(td.getCitation() == null && isPhycobankID){
380 logger.error("Missing TypeDesignation Citation in Phycobank data");
381 continue;
382 }
383 if((isPhycobankID && youngestPub == td.getCitation()) || !isPhycobankID){
384 typeDesignations.add(td);
385 } else {
386 logger.debug("skipping typedesignation published in older reference");
387 }
388 }
389 reg.setTypeDesignations(typeDesignations);
390 }
391 reg.setRegistrationDate(regDate);
392 newRegs.add(reg);
393
394 } catch (JsonParseException e) {
395 logger.error("Error parsing IAPTRegData from extension", e);
396 } catch (JsonMappingException e) {
397 logger.error("Error mapping json from extension to IAPTRegData", e);
398 } catch (IOException e) {
399 logger.error(e);
400 }
401
402 }
403 repo.getRegistrationService().save(newRegs);
404 tx.flush();
405 logger.debug("Registrations saved");
406 pageIndex++;
407 }
408 repo.commitTransaction(tx);
409 }
410 }
411
412
413 /**
414 * @param youngestDate
415 * @param pubdate
416 * @return
417 */
418 protected boolean comparePartials(Partial youngestDate, Partial pubdate) {
419
420 if(youngestDate.size() == pubdate.size()) {
421 return youngestDate.compareTo(pubdate) < 0;
422 }
423 youngestDate = youngestDate.without(DateTimeFieldType.dayOfMonth());
424 pubdate = pubdate.without(DateTimeFieldType.dayOfMonth());
425 if(youngestDate.size() == pubdate.size()) {
426 return youngestDate.compareTo(pubdate) < 0;
427 }
428 youngestDate = youngestDate.without(DateTimeFieldType.monthOfYear());
429 pubdate = pubdate.without(DateTimeFieldType.monthOfYear());
430 return youngestDate.compareTo(pubdate) < 0;
431
432 }
433
434
435 /**
436 * @param datePublished
437 * @return
438 */
439 private Partial partial(TimePeriod datePublished) {
440 if(datePublished != null){
441 if(datePublished.getEnd() != null){
442 return datePublished.getEnd();
443 } else {
444 return datePublished.getStart();
445 }
446 }
447 return null;
448 }
449
450
451 /**
452 * @param office
453 * @return
454 */
455 private Institution getInstitution(String office) {
456 Institution institution;
457 if(instituteMap.containsKey(office)){
458 institution = instituteMap.get(office);
459 } else {
460
461 Pager<AgentBase> pager = repo.getAgentService().findByTitle(Institution.class, office, MatchMode.EXACT, null, null, null, null, null);
462 if(!pager.getRecords().isEmpty()){
463 institution = (Institution) pager.getRecords().get(0);
464 } else {
465 Institution institute = (Institution) repo.getAgentService().save(Institution.NewNamedInstance(office));
466 institution = institute;
467 }
468 instituteMap.put(office, institution);
469 }
470 return institution;
471 }
472
473
474 private ExtensionType getExtensionTypeIAPTRegData() {
475 if(extensionTypeIAPTRegData == null){
476 extensionTypeIAPTRegData = (ExtensionType) repo.getTermService().load(UUID.fromString("9be1bfe3-6ba0-4560-af15-86971ab96e09"));
477 }
478 return extensionTypeIAPTRegData;
479 }
480
481
482
483 }