ref #9664 change nomTitle to nomTitleCache in cdmlib-app
[cdmlib-apps.git] / app-import / src / main / java / eu / etaxonomy / cdm / io / berlinModel / in / BerlinModelReferenceImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9
10 package eu.etaxonomy.cdm.io.berlinModel.in;
11
12 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_ARTICLE;
13 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_BOOK;
14 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_CONFERENCE_PROCEEDINGS;
15 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_DATABASE;
16 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_INFORMAL;
17 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_JOURNAL;
18 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_JOURNAL_VOLUME;
19 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_PART_OF_OTHER_TITLE;
20 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_PRINT_SERIES;
21 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_UNKNOWN;
22 import static eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer.REF_WEBSITE;
23 import static eu.etaxonomy.cdm.io.common.ImportHelper.NO_OVERWRITE;
24 import static eu.etaxonomy.cdm.io.common.ImportHelper.OBLIGATORY;
25 import static eu.etaxonomy.cdm.io.common.ImportHelper.OVERWRITE;
26
27 import java.net.URISyntaxException;
28 import java.sql.ResultSet;
29 import java.sql.SQLException;
30 import java.util.ArrayList;
31 import java.util.Arrays;
32 import java.util.HashMap;
33 import java.util.HashSet;
34 import java.util.List;
35 import java.util.Map;
36 import java.util.Set;
37 import java.util.UUID;
38 import java.util.regex.Matcher;
39 import java.util.regex.Pattern;
40
41 import org.apache.log4j.Logger;
42 import org.springframework.stereotype.Component;
43
44 import eu.etaxonomy.cdm.common.CdmUtils;
45 import eu.etaxonomy.cdm.common.DOI;
46 import eu.etaxonomy.cdm.common.URI;
47 import eu.etaxonomy.cdm.io.berlinModel.BerlinModelTransformer;
48 import eu.etaxonomy.cdm.io.berlinModel.in.validation.BerlinModelReferenceImportValidator;
49 import eu.etaxonomy.cdm.io.common.ICdmImport;
50 import eu.etaxonomy.cdm.io.common.IImportConfigurator;
51 import eu.etaxonomy.cdm.io.common.ImportHelper;
52 import eu.etaxonomy.cdm.io.common.ResultSetPartitioner;
53 import eu.etaxonomy.cdm.io.common.Source;
54 import eu.etaxonomy.cdm.io.common.mapping.CdmAttributeMapperBase;
55 import eu.etaxonomy.cdm.io.common.mapping.CdmIoMapping;
56 import eu.etaxonomy.cdm.io.common.mapping.CdmSingleAttributeMapperBase;
57 import eu.etaxonomy.cdm.io.common.mapping.DbImportExtensionMapper;
58 import eu.etaxonomy.cdm.io.common.mapping.DbImportMarkerMapper;
59 import eu.etaxonomy.cdm.io.common.mapping.DbSingleAttributeImportMapperBase;
60 import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmOneToManyMapper;
61 import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmStringMapper;
62 import eu.etaxonomy.cdm.io.common.mapping.berlinModel.CdmUriMapper;
63 import eu.etaxonomy.cdm.io.common.utils.ImportDeduplicationHelper;
64 import eu.etaxonomy.cdm.model.agent.Person;
65 import eu.etaxonomy.cdm.model.agent.Team;
66 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
67 import eu.etaxonomy.cdm.model.common.CdmBase;
68 import eu.etaxonomy.cdm.model.common.ExtensionType;
69 import eu.etaxonomy.cdm.model.common.IdentifiableSource;
70 import eu.etaxonomy.cdm.model.common.Identifier;
71 import eu.etaxonomy.cdm.model.common.Marker;
72 import eu.etaxonomy.cdm.model.common.MarkerType;
73 import eu.etaxonomy.cdm.model.reference.IArticle;
74 import eu.etaxonomy.cdm.model.reference.IBookSection;
75 import eu.etaxonomy.cdm.model.reference.IPrintSeries;
76 import eu.etaxonomy.cdm.model.reference.Reference;
77 import eu.etaxonomy.cdm.model.reference.ReferenceFactory;
78 import eu.etaxonomy.cdm.model.reference.ReferenceType;
79 import eu.etaxonomy.cdm.model.term.DefinedTerm;
80 import eu.etaxonomy.cdm.model.term.TermVocabulary;
81 import eu.etaxonomy.cdm.strategy.cache.agent.PersonDefaultCacheStrategy;
82 import eu.etaxonomy.cdm.strategy.cache.agent.TeamDefaultCacheStrategy;
83
84 /**
85 * @author a.mueller
86 * @since 20.03.2008
87 */
88 @Component
89 public class BerlinModelReferenceImport extends BerlinModelImportBase {
90 private static final long serialVersionUID = -3667566958769967591L;
91
92 private static final Logger logger = Logger.getLogger(BerlinModelReferenceImport.class);
93
94 public static final String REFERENCE_NAMESPACE = "Reference";
95 private static final String REF_AUTHOR_NAMESPACE = "Reference.refAuthorString";
96
97
98 public static final UUID REF_DEPOSITED_AT_UUID = UUID.fromString("23ca88c7-ce73-41b2-8ca3-2cb22f013beb");
99 public static final UUID REF_SOURCE_UUID = UUID.fromString("d6432582-2216-4b08-b0db-76f6c1013141");
100 public static final UUID DATE_STRING_UUID = UUID.fromString("e4130eae-606e-4b0c-be4f-e93dc161be7d");
101 public static final UUID IS_PAPER_UUID = UUID.fromString("8a326129-d0d0-4f9d-bbdf-8d86b037c65e");
102
103 private static ImportDeduplicationHelper<BerlinModelImportState> deduplicationHelper;
104
105 private final int modCount = 1000;
106 private static final String pluralString = "references";
107 private static final String dbTableName = "reference";
108
109
110 public BerlinModelReferenceImport(){
111 super(dbTableName, pluralString);
112 }
113
114 protected void initializeMappers(BerlinModelImportState state){
115 for (CdmAttributeMapperBase mapper: classMappers){
116 if (mapper instanceof DbSingleAttributeImportMapperBase){
117 @SuppressWarnings("unchecked")
118 DbSingleAttributeImportMapperBase<BerlinModelImportState,Reference> singleMapper =
119 (DbSingleAttributeImportMapperBase<BerlinModelImportState,Reference>)mapper;
120 singleMapper.initialize(state, Reference.class);
121 }
122 }
123 return;
124 }
125
126 private Set<Integer> commonNameRefSet = null;
127 private void initializeCommonNameRefMap(BerlinModelImportState state) throws SQLException{
128 if (state.getConfig().isEuroMed()){
129 commonNameRefSet = new HashSet<>();
130 String queryStr = "SELECT DISTINCT RefFk "
131 + " FROM emCommonName ";
132 ResultSet rs = state.getConfig().getSource().getResultSet(queryStr);
133 while (rs.next()){
134 commonNameRefSet.add(rs.getInt("RefFk"));
135 }
136 }
137 }
138
139 protected static CdmAttributeMapperBase[] classMappers = new CdmAttributeMapperBase[]{
140 new CdmStringMapper("edition", "edition"),
141 new CdmStringMapper("volume", "volume"),
142 new CdmStringMapper("publisher", "publisher"),
143 new CdmStringMapper("publicationTown", "placePublished"),
144 new CdmStringMapper("isbn", "isbn"),
145 new CdmStringMapper("isbn", "isbn"),
146 new CdmStringMapper("pageString", "pages"),
147 new CdmStringMapper("series", "seriesPart"),
148 new CdmStringMapper("issn", "issn"),
149 new CdmUriMapper("url", "uri"),
150 DbImportExtensionMapper.NewInstance("NomStandard", ExtensionType.NOMENCLATURAL_STANDARD()),
151 DbImportExtensionMapper.NewInstance("DateString", DATE_STRING_UUID, "Date String", "Date String", "dates"),
152 DbImportExtensionMapper.NewInstance("RefDepositedAt", REF_DEPOSITED_AT_UUID, "Ref. deposited at", "Reference is deposited at", "at"),
153 DbImportExtensionMapper.NewInstance("RefSource", REF_SOURCE_UUID, "RefSource", "Reference Source", "source"),
154 DbImportMarkerMapper.NewInstance("isPaper", IS_PAPER_UUID, "is paper", "is paper", "paper", false)
155 //not yet supported by model
156 ,new CdmStringMapper("refAuthorString", "refAuthorString"),
157 };
158
159
160 protected static String[] operationalAttributes = new String[]{
161 "refId", "refCache", "nomRefCache", "preliminaryFlag", "inRefFk", "title", "nomTitleAbbrev",
162 "refAuthorString", "nomAuthorTeamFk",
163 "refCategoryFk", "thesisFlag", "informalRefCategory", "idInSource"
164 };
165
166 protected static String[] createdAndNotesAttributes = new String[]{
167 "created_When", "updated_When", "created_Who", "updated_Who", "notes"
168 };
169
170 protected static String[] unclearMappers = new String[]{
171 /*"isPaper",*/ "exportDate",
172 };
173
174 //TODO isPaper
175 //
176
177
178
179 //type to count the references nomReferences that have been created and saved
180 private class RefCounter{
181 RefCounter() {refCount = 0;}
182 int refCount;
183 int dedupCount;
184
185 @Override
186 public String toString(){return String.valueOf(refCount) + "/" + String.valueOf(dedupCount) ;}
187 }
188
189 @Override
190 protected String getRecordQuery(BerlinModelImportConfigurator config) {
191 return null; //not needed
192 }
193
194 @Override
195 protected void doInvoke(BerlinModelImportState state){
196 logger.info("start make " + getPluralString() + " ...");
197 deduplicationHelper = ImportDeduplicationHelper.NewInstance(this, state);
198
199 boolean success = true;
200 initializeMappers(state);
201 try {
202 initializeCommonNameRefMap(state);
203 } catch (SQLException e1) {
204 e1.printStackTrace();
205 logger.error("Error in initializeCommonNameRefMap in BerlinModelReferenceimport");
206 }
207 BerlinModelImportConfigurator config = state.getConfig();
208 Source source = config.getSource();
209
210 String strSelectId = " SELECT Reference.RefId as refId ";
211 String strSelectFull =
212 " SELECT Reference.* ,InReference.RefCategoryFk as InRefCategoryFk, RefSource.RefSource " ;
213 String strFrom =
214 " FROM %s " +
215 " LEFT OUTER JOIN Reference as InReference ON InReference.refId = Reference.inRefFk " +
216 " LEFT OUTER JOIN RefSource ON Reference.RefSourceFk = RefSource.RefSourceId " +
217 " WHERE (1=1) ";
218 String strOrderBy = " ORDER BY InReference.inRefFk, Reference.inRefFk "; //to make in-references available in first run
219 String strWherePartitioned = " AND (Reference.refId IN ("+ ID_LIST_TOKEN + ") ) ";
220
221 String referenceTable = CdmUtils.Nz(state.getConfig().getReferenceIdTable());
222 referenceTable = referenceTable.isEmpty() ? " Reference" : referenceTable + " as Reference ";
223 String strIdFrom = String.format(strFrom, referenceTable );
224
225 String referenceFilter = CdmUtils.Nz(state.getConfig().getReferenceIdTable());
226 if (! referenceFilter.isEmpty()){
227 referenceFilter = " AND " + referenceFilter + " ";
228 }
229 referenceFilter = ""; //don't use it for now, in E+M the tabelle is directly used
230
231 String strIdQueryFirstPath = strSelectId + strIdFrom + strOrderBy ;
232 String strIdQuerySecondPath = strSelectId + strIdFrom + " AND (Reference.InRefFk is NOT NULL) ";
233
234 // if (config.getDoReferences() == CONCEPT_REFERENCES){
235 // strIdQueryNoInRef += " AND ( Reference.refId IN ( SELECT ptRefFk FROM PTaxon) ) " + referenceFilter;
236 // }
237
238 String strRecordQuery = strSelectFull + String.format(strFrom, " Reference ") + strWherePartitioned + strOrderBy;
239
240 int recordsPerTransaction = config.getRecordsPerTransaction();
241 try{
242 //firstPath
243 ResultSetPartitioner<BerlinModelImportState> partitioner =
244 ResultSetPartitioner.NewInstance(source, strIdQueryFirstPath, strRecordQuery, recordsPerTransaction);
245 while (partitioner.nextPartition()){
246 partitioner.doPartition(this, state);
247 }
248 logger.info("end make references without in-references ... " + getSuccessString(success));
249 state.setReferenceSecondPath(true);
250
251 //secondPath
252 // partitioner = ResultSetPartitioner.NewInstance(source, strIdQuerySecondPath, strRecordQuery, recordsPerTransaction);
253 // while (partitioner.nextPartition()){
254 // //currently not used as inRef assignment fully works through sorting of idQuery now, at least in E+M
255 // partitioner.doPartition(this, state);
256 // }
257 // logger.info("end make references with no 1 in-reference ... " + getSuccessString(success));
258 state.setReferenceSecondPath(false);
259 logger.warn("Parsed book volumes: " + parsedBookVolumes);
260 } catch (SQLException e) {
261 logger.error("SQLException:" + e);
262 state.setUnsuccessfull();
263 return;
264 }
265 logger.info("end make " + getPluralString() + " ... " + getSuccessString(success));
266 if (! success){
267 state.setUnsuccessfull();
268 }
269 deduplicationHelper = null;
270 return;
271 }
272
273
274 @Override
275 public boolean doPartition(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
276 deduplicationHelper.restartSession();
277
278 if (state.isReferenceSecondPath()){
279 return doPartitionSecondPath(partitioner, state);
280 }
281 boolean success = true;
282
283 Map<Integer, Reference> refToSave = new HashMap<>();
284
285 BerlinModelImportConfigurator config = state.getConfig();
286
287 try {
288
289 int i = 0;
290 RefCounter refCounter = new RefCounter();
291 ResultSet rs = partitioner.getResultSet();
292
293 //for each resultset
294 while (rs.next()){
295 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("References handled: " + (i-1) + " in round -" );}
296
297 success &= makeSingleReferenceRecord(rs, state, partitioner, refToSave, refCounter);
298 } // end resultSet
299
300 //for the concept reference a fixed uuid may be needed -> change uuid
301 Integer sourceSecId = (Integer)config.getSourceSecId();
302 Reference sec = refToSave.get(sourceSecId);
303
304 if (sec != null){
305 sec.setUuid(config.getSecUuid());
306 logger.info("SecUuid changed to: " + config.getSecUuid());
307 }
308
309 //save and store in map
310 logger.warn("Save references (" + refCounter.toString() + ")"); //set preliminary to warn for printing dedup count
311
312 getReferenceService().saveOrUpdate(refToSave.values());
313
314 // logger.info("end makeReferences ..." + getSuccessString(success));;
315 return success;
316 } catch (SQLException e) {
317 logger.error("SQLException:" + e);
318 return false;
319 }
320 }
321
322
323
324 /**
325 * Adds the inReference to the according references.
326 * @param partitioner
327 * @param state
328 * @return
329 */
330 private boolean doPartitionSecondPath(@SuppressWarnings("rawtypes") ResultSetPartitioner partitioner, BerlinModelImportState state) {
331 boolean success = true;
332
333 Map<Integer, Reference> refToSave = new HashMap<>();
334
335 @SuppressWarnings("unchecked")
336 Map<String, Reference> relatedReferencesMap = partitioner.getObjectMap(REFERENCE_NAMESPACE);
337
338 try {
339 int i = 0;
340 RefCounter refCounter = new RefCounter();
341
342 ResultSet rs = partitioner.getResultSet();
343 //for each resultset
344 while (rs.next()){
345 if ((i++ % modCount) == 0 && i!= 1 ){ logger.info("References handled: " + (i-1) + " in round -" );}
346
347 Integer refId = rs.getInt("refId");
348 Integer inRefFk = nullSafeInt(rs, "inRefFk");
349
350 if (inRefFk != null){
351
352 Reference thisRef = relatedReferencesMap.get(String.valueOf(refId));
353
354 Reference inRef = relatedReferencesMap.get(String.valueOf(inRefFk));
355
356 if (thisRef != null){
357 if (inRef == null){
358 logger.warn("No InRef found for nomRef: " + thisRef.getTitleCache() + "; RefId: " + refId + "; inRefFK: " + inRefFk);
359 }
360 thisRef.setInReference(inRef);
361 refToSave.put(refId, thisRef);
362 if(!thisRef.isProtectedTitleCache()){
363 thisRef.setTitleCache(null);
364 thisRef.getTitleCache();
365 }
366 }else{
367 logger.warn("Reference which has an inReference not found in DB. RefId: " + refId);
368 }
369 if(inRefFk.equals(0)){
370 logger.warn("InRefFk is 0 for refId "+ refId);
371 }
372 }
373
374 } // end resultSet
375
376 //save and store in map
377 logger.info("Save in references (" + refCounter.toString() + ")");
378 getReferenceService().saveOrUpdate(refToSave.values());
379
380 // }//end resultSetList
381
382 // logger.info("end makeReferences ..." + getSuccessString(success));;
383 return success;
384 } catch (SQLException e) {
385 logger.error("SQLException:" + e);
386 return false;
387 }
388 }
389
390
391 @Override
392 public Map<Object, Map<String, ? extends CdmBase>> getRelatedObjectsForPartition(ResultSet rs, BerlinModelImportState state) {
393
394 String nameSpace;
395 Set<String> idSet;
396
397 Map<Object, Map<String, ? extends CdmBase>> result = new HashMap<>();
398
399 try{
400 Set<String> teamIdSet = new HashSet<>();
401 Set<String> referenceIdSet = new HashSet<>();
402 Set<String> teamStringSet = new HashSet<>();
403
404 while (rs.next()){
405 handleForeignKey(rs, teamIdSet, "NomAuthorTeamFk");
406 handleForeignKey(rs, referenceIdSet, "InRefFk");
407 handleForeignKey(rs, teamStringSet, "refAuthorString");
408 //TODO only needed in second path but state not available here to check if state is second path
409 handleForeignKey(rs, referenceIdSet, "refId");
410 }
411
412 Set<String> teamStringSet2 = new HashSet<>();
413 for (String teamString : teamStringSet){
414 teamStringSet2.add(teamString.replace("'", "´"));
415 }
416
417 //team map
418 nameSpace = BerlinModelAuthorTeamImport.NAMESPACE;
419 idSet = teamIdSet;
420 @SuppressWarnings("rawtypes")
421 Map<String, TeamOrPersonBase> teamMap = getCommonService().getSourcedObjectsByIdInSourceC(TeamOrPersonBase.class, idSet, nameSpace);
422 result.put(nameSpace, teamMap);
423
424 //refAuthor map
425 nameSpace = REF_AUTHOR_NAMESPACE;
426 idSet = teamStringSet2;
427 @SuppressWarnings("unchecked")
428 Map<String, TeamOrPersonBase> refAuthorMap = getCommonService().getSourcedObjectsByIdInSourceC(TeamOrPersonBase.class, idSet, nameSpace);
429 result.put(nameSpace, refAuthorMap);
430
431 //reference map
432 nameSpace = BerlinModelReferenceImport.REFERENCE_NAMESPACE;
433 idSet = referenceIdSet;
434 Map<String, Reference> referenceMap = getCommonService().getSourcedObjectsByIdInSourceC(Reference.class, idSet, nameSpace);
435 result.put(nameSpace, referenceMap);
436
437 } catch (SQLException e) {
438 throw new RuntimeException(e);
439 }
440 return result;
441 }
442
443
444 /**
445 * Handles a single reference record
446 * @param rs
447 * @param state
448 * @param biblioRefToSave
449 * @param nomRefToSave
450 * @param relatedBiblioReferences
451 * @param relatedNomReferences
452 * @param refCounter
453 * @return
454 */
455 private boolean makeSingleReferenceRecord(
456 ResultSet rs,
457 BerlinModelImportState state,
458 ResultSetPartitioner<BerlinModelImportState> partitioner,
459 Map<Integer, Reference> refToSave,
460 RefCounter refCounter){
461
462 boolean success = true;
463
464 Integer refId = null;
465 try {
466 Map<String, Object> valueMap = getValueMap(rs);
467
468 Integer categoryFk = (Integer)valueMap.get("refCategoryFk".toLowerCase());
469 refId = (Integer)valueMap.get("refId".toLowerCase());
470 Boolean thesisFlag = (Boolean)valueMap.get("thesisFlag".toLowerCase());
471
472
473 Reference reference;
474 logger.debug("RefCategoryFk: " + categoryFk);
475
476 if (thesisFlag){
477 reference = makeThesis(valueMap);
478 }else if (categoryFk == REF_JOURNAL){
479 reference = makeJournal(valueMap);
480 }else if(categoryFk == REF_BOOK){
481 reference = makeBook(valueMap);
482 }else if(categoryFk == REF_DATABASE){
483 reference = makeDatabase(valueMap);
484 }else if(categoryFk == REF_INFORMAL){
485 reference = makeInformal(valueMap);
486 }else if(categoryFk == REF_WEBSITE){
487 reference = makeWebSite(valueMap);
488 }else if(categoryFk == REF_UNKNOWN){
489 reference = makeUnknown(valueMap);
490 }else if(categoryFk == REF_PRINT_SERIES){
491 reference = makePrintSeries(valueMap);
492 }else if(categoryFk == REF_CONFERENCE_PROCEEDINGS){
493 reference = makeProceedings(valueMap);
494 }else if(categoryFk == REF_ARTICLE){
495 reference = makeArticle(valueMap);
496 }else if(categoryFk == REF_JOURNAL_VOLUME){
497 reference = makeJournalVolume(valueMap);
498 }else if(categoryFk == REF_PART_OF_OTHER_TITLE){
499 reference = makePartOfOtherTitle(valueMap);
500 }else{
501 logger.warn("Unknown categoryFk (" + categoryFk + "). Create 'Generic instead'");
502 reference = ReferenceFactory.newGeneric();
503 success = false;
504 }
505
506 //refYear
507 String refYear = (String)valueMap.get("refYear".toLowerCase());
508 reference.setDatePublished(ImportHelper.getDatePublished(refYear));
509
510 handleEdition(reference);
511
512 //created, updated, notes
513 doCreatedUpdatedNotes(state, reference, rs);
514
515 //idInSource (import from older source to berlin model)
516 //TODO do we want this being imported? Maybe as alternatvie identifier?
517 String idInSource = (String)valueMap.get("IdInSource".toLowerCase());
518 if (isNotBlank(idInSource)){
519 if(!state.getConfig().isDoSourceNumber()){
520 IdentifiableSource source = IdentifiableSource.NewDataImportInstance(idInSource);
521 source.setIdNamespace("import to Berlin Model");
522 reference.addSource(source);
523 }else{
524 makeSourceNumbers(state, idInSource, reference, refId);
525 }
526 }
527 String uuid = null;
528 if (resultSetHasColumn(rs,"UUID")){
529 uuid = rs.getString("UUID");
530 if (uuid != null){
531 reference.setUuid(UUID.fromString(uuid));
532 }
533 }
534
535 //nom&BiblioReference - must be last because a clone is created
536 success &= makeNomAndBiblioReference(rs, state, partitioner, refId, reference, refCounter, refToSave);
537
538
539 } catch (Exception e) {
540 logger.warn("Reference with BM refId '" + CdmUtils.Nz(refId) + "' threw Exception and could not be saved");
541 e.printStackTrace();
542 success = false;
543 }
544 return success;
545 }
546
547
548 /**
549 * @param state
550 * @param idInSource
551 * @param reference
552 * @param refId
553 */
554 private void makeSourceNumbers(BerlinModelImportState state, String idInSource, Reference reference,
555 Integer refId) {
556 String[] splits = idInSource.split("\\|");
557 for (String split : splits){
558 split = split.trim();
559 UUID uuid = BerlinModelTransformer.uuidEMReferenceSourceNumber;
560 TermVocabulary<DefinedTerm> voc = null; //user defined voc
561 DefinedTerm type = getIdentiferType(state, uuid, "E+M Reference Source Number", "Euro+Med Reference Source Number", "E+M Source Number", voc);
562 Identifier.NewInstance(reference, split, type);
563 }
564 }
565
566 /**
567 * @param reference
568 */
569 private void handleEdition(Reference reference) {
570 if (reference.getEdition()!= null && reference.getEdition().startsWith("ed. ")){
571 reference.setEdition(reference.getEdition().substring(4));
572 }
573
574 }
575
576 /**
577 * Creates and saves a nom. reference and a biblio. reference after checking necessity
578 * @param rs
579 * @param refId
580 * @param ref
581 * @param refCounter
582 * @param biblioRefToSave
583 * @param nomRefToSave
584 * @param teamMap
585 * @param stores
586 * @return
587 * @throws SQLException
588 */
589 private boolean makeNomAndBiblioReference(
590 ResultSet rs,
591 BerlinModelImportState state,
592 @SuppressWarnings("rawtypes") ResultSetPartitioner partitioner,
593 int refId,
594 Reference ref,
595 RefCounter refCounter,
596 Map<Integer, Reference> refToSave
597 ) throws SQLException{
598
599 @SuppressWarnings("unchecked")
600 Map<String, Team> teamMap = partitioner.getObjectMap(BerlinModelAuthorTeamImport.NAMESPACE);
601
602 String refCache = trim(rs.getString("refCache"));
603 String nomRefCache = trim(rs.getString("nomRefCache"));
604 String title = trim(rs.getString("title"));
605 String nomTitleAbbrev = trim(rs.getString("nomTitleAbbrev"));
606 boolean isPreliminary = rs.getBoolean("PreliminaryFlag");
607 String refAuthorString = trim(rs.getString("refAuthorString"));
608 Integer nomAuthorTeamFk = nullSafeInt(rs, "NomAuthorTeamFk");
609 Integer inRefFk = nullSafeInt(rs, "inRefFk");
610
611
612 TeamOrPersonBase<?> nomAuthor = null;
613 if (nomAuthorTeamFk != null){
614 String strNomAuthorTeamFk = String.valueOf(nomAuthorTeamFk);
615 nomAuthor = teamMap.get(strNomAuthorTeamFk);
616 if (nomAuthor == null){
617 logger.warn("NomAuthor ("+strNomAuthorTeamFk+") not found in teamMap (but it should exist) for " + refId);
618 }
619 }
620
621 Reference sourceReference = state.getTransactionalSourceReference();
622
623 //preliminary
624 if (isPreliminary){
625 ref.setAbbrevTitleCache(nomRefCache, true);
626 ref.setTitleCache(refCache, true);
627 }
628
629 //title/abbrevTitle
630 if (isNotBlank(nomTitleAbbrev)){
631 ref.setAbbrevTitle(nomTitleAbbrev);
632 }
633 if (isNotBlank(title)){
634 ref.setTitle(title);
635 }
636
637 //author
638 TeamOrPersonBase<?> author = getAuthorship(state, refAuthorString, nomAuthor, refId);
639 ref.setAuthorship(author);
640
641 if (ref.getType().equals(ReferenceType.Book)){
642 extraktBookVolume(ref);
643 }
644
645 //inRef
646 Reference inRef = null;
647 if (inRefFk != null){
648 @SuppressWarnings({"unchecked" })
649 Map<String, Reference> relatedReferences = partitioner.getObjectMap(REFERENCE_NAMESPACE);
650 inRef = relatedReferences.get(String.valueOf(inRefFk));
651 if (inRef == null){
652 inRef = refToSave.get(inRefFk);
653 }
654 if (inRef == null){
655 logger.warn("InRef not (yet) found. RefId: " + refId + "; InRef: "+ inRefFk);
656 }else{
657 ref.setInReference(inRef);
658 }
659 }
660
661 Reference result = deduplicateReference(state, ref);
662 if(ref != result){
663 //dedup not possible at this point because inRef exists but is not yet defined
664 if (inRefFk != null && inRef == null){
665 result = ref;
666 logger.warn("Ref has deduplication candidate but inRef is still missing. " + inRef);
667 }else{
668 logger.debug("Reference was deduplicated. RefId: " + refId);
669 //FIXME also check annotations etc. for deduplication
670 refCounter.dedupCount++;
671 }
672 }else{
673 refCounter.refCount++;
674 }
675
676 //save
677 if (! refToSave.containsKey(refId)){
678 refToSave.put(refId, result);
679 }else{
680 //should not happen
681 logger.warn("Duplicate refId in Berlin Model database. Second reference was not imported !!");
682 }
683
684
685 //refId
686 ImportHelper.setOriginalSource(result, sourceReference, refId, REFERENCE_NAMESPACE);
687
688 if (commonNameRefSet != null && commonNameRefSet.contains(refId)){
689 result.addMarker(Marker.NewInstance(MarkerType.COMMON_NAME_REFERENCE(), true));
690 }
691
692 return true;
693 }
694
695 /**
696 * @param string
697 * @return
698 */
699 private String trim(String string) {
700 if (string == null){
701 return null;
702 }else{
703 return string.trim();
704 }
705 }
706
707 /**
708 * Copies the created and updated information from the nomReference to the cloned bibliographic reference
709 * @param referenceBase
710 * @param nomReference
711 */
712 private void copyCreatedUpdated(Reference biblioReference, Reference nomReference) {
713 biblioReference.setCreatedBy(nomReference.getCreatedBy());
714 biblioReference.setCreated(nomReference.getCreated());
715 biblioReference.setUpdatedBy(nomReference.getUpdatedBy());
716 biblioReference.setUpdated(nomReference.getUpdated());
717
718 }
719
720 private Reference makeArticle (Map<String, Object> valueMap){
721
722 IArticle article = ReferenceFactory.newArticle();
723 Object inRefFk = valueMap.get("inRefFk".toLowerCase());
724 Integer inRefCategoryFk = (Integer)valueMap.get("inRefCategoryFk".toLowerCase());
725 Integer refId = (Integer)valueMap.get("refId".toLowerCase());
726
727 if (inRefFk != null){
728 if (inRefCategoryFk != REF_JOURNAL){
729 logger.warn("Wrong inrefCategory for Article (refID = " + refId +"). Type must be 'Journal' but was not (RefCategoryFk=" + inRefCategoryFk + "))." +
730 " InReference was added anyway! ");
731 }
732 }else{
733 logger.warn ("Article has no inreference: " + refId);
734 }
735 makeStandardMapper(valueMap, (Reference)article); //url, pages, series, volume
736 String url = (String)valueMap.get("url");
737 if (url != null && url.contains("dx.doi.org")){
738 article.setDoi(DOI.fromString(url));
739 article.setUri(null);
740 }
741 return (Reference)article;
742 }
743
744 private Reference makePartOfOtherTitle (Map<String, Object> valueMap){
745
746 Reference result;
747 Object inRefFk = valueMap.get("inRefFk".toLowerCase());
748 Integer inRefCategoryFk = (Integer)valueMap.get("inRefCategoryFk".toLowerCase());
749 Integer refId = (Integer)valueMap.get("refId".toLowerCase());
750
751 if (inRefCategoryFk == null){
752 //null -> error
753 logger.warn("Part-Of-Other-Title has no inRefCategoryFk! RefId = " + refId + ". ReferenceType set to Generic.");
754 result = makeUnknown(valueMap);
755 }else if (inRefFk == null){
756 //TODO is this correct ??
757 logger.warn("Part-Of-Other-Title has no in reference: " + refId);
758 result = makeUnknown(valueMap);
759 }else if (inRefCategoryFk == REF_BOOK){
760 //BookSection
761 IBookSection bookSection = ReferenceFactory.newBookSection();
762 result = (Reference)bookSection;
763 }else if (inRefCategoryFk == REF_ARTICLE){
764 //Article
765 logger.info("Reference (refId = " + refId + ") of type 'part_of_other_title' is part of 'article'." +
766 " We use the section reference type for such in references now.") ;
767 result = ReferenceFactory.newSection();
768 }else if (inRefCategoryFk == REF_JOURNAL){
769 //TODO
770 logger.warn("Reference (refId = " + refId + ") of type 'part_of_other_title' has inReference of type 'journal'." +
771 " This is not allowed! Generic reference created instead") ;
772 result = ReferenceFactory.newGeneric();
773 result.addMarker(Marker.NewInstance(MarkerType.TO_BE_CHECKED(), true));
774 }else if (inRefCategoryFk == REF_PART_OF_OTHER_TITLE){
775 logger.info("Reference (refId = " + refId + ") of type 'part_of_other_title' has inReference 'part of other title'." +
776 " This is allowed, but may be true only for specific cases (e.g. parts of book chapters). You may want to check if this is correct") ;
777 result = ReferenceFactory.newSection();
778 }else{
779 logger.warn("InReference type (catFk = " + inRefCategoryFk + ") of part-of-reference not recognized for refId " + refId + "." +
780 " Create 'Generic' reference instead");
781 result = ReferenceFactory.newGeneric();
782 }
783 makeStandardMapper(valueMap, result); //url, pages
784 return result;
785 }
786
787
788 /**
789 * @param inRefFkInt
790 * @param biblioRefToSave
791 * @param nomRefToSave
792 * @param relatedBiblioReferences
793 * @param relatedNomReferences
794 * @return
795 */
796 private boolean existsInMapOrToSave(Integer inRefFkInt, Map<Integer, Reference> refToSave, Map<String, Reference> relatedReferences) {
797 boolean result = false;
798 if (inRefFkInt == null){
799 return false;
800 }
801 result |= refToSave.containsKey(inRefFkInt);
802 result |= relatedReferences.containsKey(String.valueOf(inRefFkInt));
803 return result;
804 }
805
806 private Reference makeWebSite(Map<String, Object> valueMap){
807 if (logger.isDebugEnabled()){logger.debug("RefType 'Website'");}
808 Reference webPage = ReferenceFactory.newWebPage();
809 makeStandardMapper(valueMap, webPage); //placePublished, publisher
810 return webPage;
811 }
812
813 private Reference makeUnknown(Map<String, Object> valueMap){
814 if (logger.isDebugEnabled()){logger.debug("RefType 'Unknown'");}
815 Reference generic = ReferenceFactory.newGeneric();
816 // generic.setSeries(series);
817 makeStandardMapper(valueMap, generic); //pages, placePublished, publisher, series, volume
818 return generic;
819 }
820
821 private Reference makeInformal(Map<String, Object> valueMap){
822 if (logger.isDebugEnabled()){logger.debug("RefType 'Informal'");}
823 Reference generic = ReferenceFactory.newGeneric();
824 // informal.setSeries(series);
825 makeStandardMapper(valueMap, generic);//editor, pages, placePublished, publisher, series, volume
826 String informal = (String)valueMap.get("InformalRefCategory".toLowerCase());
827 if (isNotBlank(informal) ){
828 generic.addExtension(informal, ExtensionType.INFORMAL_CATEGORY());
829 }
830 return generic;
831 }
832
833 private Reference makeDatabase(Map<String, Object> valueMap){
834 if (logger.isDebugEnabled()){logger.debug("RefType 'Database'");}
835 Reference database = ReferenceFactory.newDatabase();
836 makeStandardMapper(valueMap, database); //?
837 return database;
838 }
839
840 private Reference makeJournal(Map<String, Object> valueMap){
841 if (logger.isDebugEnabled()){logger.debug("RefType 'Journal'");}
842 Reference journal = ReferenceFactory.newJournal();
843
844 Set<String> omitAttributes = new HashSet<>();
845 String series = "series";
846 // omitAttributes.add(series);
847
848 makeStandardMapper(valueMap, journal, omitAttributes); //issn,placePublished,publisher
849 // if (valueMap.get(series) != null){
850 // logger.warn("Series not yet implemented for journal!");
851 // }
852 return journal;
853 }
854
855 private Reference makeBook(
856 Map<String, Object> valueMap){
857
858 if (logger.isDebugEnabled()){logger.debug("RefType 'Book'");}
859 Reference book = ReferenceFactory.newBook();
860 // Integer refId = (Integer)valueMap.get("refId".toLowerCase());
861
862 //Set bookAttributes = new String[]{"edition", "isbn", "pages","publicationTown","publisher","volume"};
863
864 Set<String> omitAttributes = new HashSet<>();
865 String attrSeries = "series";
866 // omitAttributes.add(attrSeries);
867
868 makeStandardMapper(valueMap, book, omitAttributes);
869
870 //Series (as String)
871 IPrintSeries printSeries = null;
872 if (valueMap.get(attrSeries) != null){
873 String series = (String)valueMap.get("title".toLowerCase());
874 if (series == null){
875 String nomTitle = (String)valueMap.get("nomTitleAbbrev".toLowerCase());
876 series = nomTitle;
877 }
878 printSeries = ReferenceFactory.newPrintSeries(series);
879 logger.info("Implementation of printSeries is preliminary");
880 }
881 //Series (as Reference)
882 if (book.getInSeries() != null && printSeries != null){
883 logger.warn("Book has series string and inSeries reference. Can not take both. Series string neglected");
884 }else{
885 book.setInSeries(printSeries);
886 }
887 book.setEditor(null);
888
889 return book;
890
891 }
892
893
894 int parsedBookVolumes = 0;
895 private void extraktBookVolume(Reference book) {
896 if (isExtractBookVolumeCandidate(book)){
897 String patternStr = "(.{2,})\\s(\\d{1,2})";
898 int groupIndex = 2;
899 Pattern pattern = Pattern.compile(patternStr);
900
901 String abbrevCache = book.getAbbrevTitleCache();
902 String titleCache = book.getTitleCache();
903 String vol = null;
904 String volFull = null;
905 String abbrev = book.getAbbrevTitle();
906 if (isNotBlank(abbrev)){
907 Matcher matcher = pattern.matcher(abbrev);
908 if (matcher.matches()){
909 vol = matcher.group(groupIndex);
910 abbrev = matcher.group(1);
911 }
912 }
913
914 String full = book.getTitle();
915 if (isNotBlank(full)){
916 Matcher matcher = pattern.matcher(full);
917 if (matcher.matches()){
918 volFull = matcher.group(groupIndex);
919 full = matcher.group(1);
920 }
921 }
922 if (vol != null && volFull != null){
923 if (!vol.equals(volFull)){
924 return;
925 }
926 }else if (vol == null && volFull == null){
927 return;
928 }else if (vol == null){
929 if (isNotBlank(abbrev)){
930 return;
931 }else{
932 vol = volFull;
933 }
934 }else if (volFull == null){
935 if (isNotBlank(full)){
936 return;
937 }
938 }else{
939 logger.warn("Should not happen");
940 }
941 book.setVolume(vol);
942 book.setAbbrevTitle(abbrev);
943 book.setTitle(full);
944 if (!book.getAbbrevTitleCache().equals(abbrevCache)){
945 logger.warn("Abbrev title cache for parsed book volume does not match: " + book.getAbbrevTitleCache() + " <-> "+abbrevCache);
946 }else if (!book.getTitleCache().equals(titleCache)){
947 logger.warn("Title cache for parsed book volume does not match: " + book.getTitleCache() + " <-> "+titleCache);
948 }else{
949 // System.out.println(titleCache);
950 // System.out.println(abbrevCache);
951 parsedBookVolumes++;
952 }
953 }else{
954 return;
955 }
956 }
957
958 /**
959 * @param book
960 * @return
961 */
962 private boolean isExtractBookVolumeCandidate(Reference book) {
963 if (isNotBlank(book.getVolume()) || isNotBlank(book.getEdition()) || isNotBlank(book.getSeriesPart())){
964 return false;
965 }
966 if (!checkExtractBookVolumeTitle(book.getAbbrevTitle())){
967 return false;
968 }
969 if (!checkExtractBookVolumeTitle(book.getTitle())){
970 return false;
971 }
972 return true;
973 }
974
975 /**
976 * @param abbrevTitle
977 * @return
978 */
979 private boolean checkExtractBookVolumeTitle(String title) {
980 if (title == null){
981 return true;
982 }
983 if (title.contains(",") || title.contains("ed.") || title.contains("Ed.")|| title.contains("Suppl")
984 || title.contains("Ser.")|| title.contains("ser.")) {
985 return false;
986 }
987 return true;
988 }
989
990 /**
991 * Returns the requested object if it exists in one of both maps. Prefers the refToSaveMap in ambigious cases.
992 * @param inRefFkInt
993 * @param nomRefToSave
994 * @param relatedNomReferences
995 * @return
996 */
997 private Reference getReferenceFromMaps(
998 int inRefFkInt,
999 Map<Integer, Reference> refToSaveMap,
1000 Map<String, Reference> relatedRefMap) {
1001 Reference result = null;
1002 result = refToSaveMap.get(inRefFkInt);
1003 if (result == null){
1004 result = relatedRefMap.get(String.valueOf(inRefFkInt));
1005 }
1006 return result;
1007 }
1008
1009 private Reference makePrintSeries(Map<String, Object> valueMap){
1010 if (logger.isDebugEnabled()){logger.debug("RefType 'PrintSeries'");}
1011 Reference printSeries = ReferenceFactory.newPrintSeries();
1012 makeStandardMapper(valueMap, printSeries, null);
1013 return printSeries;
1014 }
1015
1016 private Reference makeProceedings(Map<String, Object> valueMap){
1017 if (logger.isDebugEnabled()){logger.debug("RefType 'Proceedings'");}
1018 Reference proceedings = ReferenceFactory.newProceedings();
1019 makeStandardMapper(valueMap, proceedings, null);
1020 return proceedings;
1021 }
1022
1023 private Reference makeThesis(Map<String, Object> valueMap){
1024 if (logger.isDebugEnabled()){logger.debug("RefType 'Thesis'");}
1025 Reference thesis = ReferenceFactory.newThesis();
1026 makeStandardMapper(valueMap, thesis, null);
1027 return thesis;
1028 }
1029
1030
1031 private Reference makeJournalVolume(Map<String, Object> valueMap){
1032 if (logger.isDebugEnabled()){logger.debug("RefType 'JournalVolume'");}
1033 //Proceedings proceedings = Proceedings.NewInstance();
1034 Reference journalVolume = ReferenceFactory.newGeneric();
1035 makeStandardMapper(valueMap, journalVolume, null);
1036 logger.warn("Journal volumes not yet implemented. Generic created instead but with errors");
1037 return journalVolume;
1038 }
1039
1040 private boolean makeStandardMapper(Map<String, Object> valueMap, Reference ref){
1041 return makeStandardMapper(valueMap, ref, null);
1042 }
1043
1044
1045 private boolean makeStandardMapper(Map<String, Object> valueMap, CdmBase cdmBase, Set<String> omitAttributes){
1046 boolean result = true;
1047 for (CdmAttributeMapperBase mapper : classMappers){
1048 if (mapper instanceof CdmSingleAttributeMapperBase){
1049 result &= makeStandardSingleMapper(valueMap, cdmBase, (CdmSingleAttributeMapperBase)mapper, omitAttributes);
1050 }else if (mapper instanceof CdmOneToManyMapper){
1051 result &= makeMultipleValueAddMapper(valueMap, cdmBase, (CdmOneToManyMapper)mapper, omitAttributes);
1052 }else{
1053 logger.error("Unknown mapper type");
1054 result = false;
1055 }
1056 }
1057 return result;
1058 }
1059
1060 private boolean makeStandardSingleMapper(Map<String, Object> valueMap, CdmBase cdmBase, CdmSingleAttributeMapperBase mapper, Set<String> omitAttributes){
1061 boolean result = true;
1062 if (omitAttributes == null){
1063 omitAttributes = new HashSet<>();
1064 }
1065 if (mapper instanceof DbImportExtensionMapper){
1066 result &= ((DbImportExtensionMapper)mapper).invoke(valueMap, cdmBase);
1067 }else if (mapper instanceof DbImportMarkerMapper){
1068 result &= ((DbImportMarkerMapper)mapper).invoke(valueMap, cdmBase);
1069 }else{
1070 String sourceAttribute = mapper.getSourceAttributeList().get(0).toLowerCase();
1071 Object value = valueMap.get(sourceAttribute);
1072 if (mapper instanceof CdmUriMapper && value != null){
1073 try {
1074 value = new URI (value.toString());
1075 } catch (URISyntaxException e) {
1076 logger.error("URI syntax exception: " + value.toString());
1077 value = null;
1078 }
1079 }
1080 if (value != null){
1081 String destinationAttribute = mapper.getDestinationAttribute();
1082 if (! omitAttributes.contains(destinationAttribute)){
1083 result &= ImportHelper.addValue(value, cdmBase, destinationAttribute, mapper.getTypeClass(), OVERWRITE, OBLIGATORY);
1084 }
1085 }
1086 }
1087 return result;
1088 }
1089
1090
1091 private boolean makeMultipleValueAddMapper(Map<String, Object> valueMap, CdmBase cdmBase, CdmOneToManyMapper<CdmBase, CdmBase, CdmSingleAttributeMapperBase> mapper, Set<String> omitAttributes){
1092 if (omitAttributes == null){
1093 omitAttributes = new HashSet<>();
1094 }
1095 boolean result = true;
1096 String destinationAttribute = mapper.getSingleAttributeName();
1097 List<Object> sourceValues = new ArrayList<>();
1098 List<Class> classes = new ArrayList<>();
1099 for (CdmSingleAttributeMapperBase singleMapper : mapper.getSingleMappers()){
1100 String sourceAttribute = singleMapper.getSourceAttribute();
1101 Object value = valueMap.get(sourceAttribute);
1102 sourceValues.add(value);
1103 Class<?> clazz = singleMapper.getTypeClass();
1104 classes.add(clazz);
1105 }
1106
1107 result &= ImportHelper.addMultipleValues(sourceValues, cdmBase, destinationAttribute, classes, NO_OVERWRITE, OBLIGATORY);
1108 return result;
1109 }
1110
1111
1112 private TeamOrPersonBase<?> getAuthorship(BerlinModelImportState state, String refAuthorString,
1113 TeamOrPersonBase<?> nomAuthor, Integer refId){
1114
1115 TeamOrPersonBase<?> result;
1116 if (nomAuthor != null){
1117 result = nomAuthor;
1118 if (isNotBlank(refAuthorString) && !nomAuthor.getTitleCache().equals(refAuthorString)){
1119 boolean isSimilar = handleSimilarAuthors(state, refAuthorString, nomAuthor, refId);
1120 if (! isSimilar){
1121 String message = "refAuthorString differs from nomAuthor.titleCache: " + refAuthorString
1122 + " <-> " + nomAuthor.getTitleCache() + "; RefId: " + refId;
1123 logger.warn(message);
1124 }
1125 }
1126 } else if (isNotBlank(refAuthorString)){//only RefAuthorString exists
1127 refAuthorString = refAuthorString.trim();
1128 //TODO match with existing Persons/Teams
1129 TeamOrPersonBase<?> author = state.getRelatedObject(REF_AUTHOR_NAMESPACE, refAuthorString, TeamOrPersonBase.class);
1130 if (author == null){
1131 if (!BerlinModelAuthorTeamImport.hasTeamSeparator(refAuthorString)){
1132 author = makePerson(refAuthorString, false, refId);
1133 }else{
1134 author = makeTeam(state, refAuthorString, refId);
1135 }
1136 state.addRelatedObject(REF_AUTHOR_NAMESPACE, refAuthorString, author);
1137 result = deduplicatePersonOrTeam(state, author);
1138
1139 if (result != author){
1140 logger.debug("RefAuthorString author deduplicated " + author);
1141 }else{
1142 if (!importSourceExists(author, refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference() )){
1143 author.addImportSource(refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null);
1144 }
1145 }
1146 }else{
1147 logger.debug("RefAuthor loaded from map");
1148 }
1149 result = author;
1150 }else{
1151 result = null;
1152 }
1153
1154 return result;
1155 }
1156
1157
1158 /**
1159 * @param state
1160 * @param refAuthorString
1161 * @param refId
1162 * @return
1163 */
1164 private TeamOrPersonBase<?> makeTeam(BerlinModelImportState state, String refAuthorString, Integer refId) {
1165 Team team = Team.NewInstance();
1166 boolean hasDedupMember = false;
1167 if (containsEdOrColon(refAuthorString)){
1168 team.setTitleCache(refAuthorString, true);
1169 }else{
1170 String[] refAuthorTeams = BerlinModelAuthorTeamImport.splitTeam(refAuthorString);
1171 boolean lastWasInitials = false;
1172 for (int i = 0; i< refAuthorTeams.length ;i++){
1173 if (lastWasInitials){
1174 lastWasInitials = false;
1175 continue;
1176 }
1177 String fullTeam = refAuthorTeams[i].trim();
1178 String initials = null;
1179 if (refAuthorTeams.length > i+1){
1180 String nextSplit = refAuthorTeams[i+1].trim();
1181 if (isInitial(nextSplit)){
1182 lastWasInitials = true;
1183 initials = nextSplit;
1184 }
1185 }
1186 Person member = makePerson(fullTeam, isNotBlank(initials), refId);
1187
1188 if (initials != null){
1189 if (member.getInitials() != null){
1190 logger.warn("Initials already set: " + refId);
1191 }else if (!member.isProtectedTitleCache()){
1192 member.setInitials(initials);
1193 }else {
1194 member.setTitleCache(member.getTitleCache() + ", " + initials, true);
1195 }
1196 }
1197
1198 if (i == refAuthorTeams.length -1 && BerlinModelAuthorTeamImport.isEtAl(member)){
1199 team.setHasMoreMembers(true);
1200 }else{
1201 Person dedupMember = deduplicatePersonOrTeam(state, member);
1202 if (dedupMember != member){
1203 hasDedupMember = true;
1204 }else{
1205 if (!importSourceExists(member, refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference())){
1206 member.addImportSource(refAuthorString, REF_AUTHOR_NAMESPACE, state.getTransactionalSourceReference(), null);
1207 }
1208 }
1209
1210 team.addTeamMember(dedupMember);
1211 }
1212 }
1213 }
1214
1215 TeamOrPersonBase<?> result = team;
1216 if (team.getTeamMembers().size() == 1 && !team.isHasMoreMembers()){
1217 Person person = team.getTeamMembers().get(0);
1218 checkPerson(person, refAuthorString, hasDedupMember, refId);
1219 result = person;
1220 }else{
1221 checkTeam(team, refAuthorString, refId);
1222 result = team;
1223 }
1224
1225 return result;
1226 }
1227
1228 private static void checkTeam(Team team, String refAuthorString, Integer refId) {
1229 TeamDefaultCacheStrategy formatter = (TeamDefaultCacheStrategy) team.getCacheStrategy();
1230
1231 if (formatter.getTitleCache(team).equals(refAuthorString)){
1232 team.setProtectedTitleCache(false);
1233 }else if(formatter.getTitleCache(team).replace(" & ", ", ").equals(refAuthorString.replace(" & ", ", ").replace(" ,", ","))){
1234 //also accept teams with ', ' as final member separator as not protected
1235 team.setProtectedTitleCache(false);
1236 }else if(formatter.getFullTitle(team).replace(" & ", ", ").equals(refAuthorString.replace(" & ", ", "))){
1237 //.. or teams with initials first
1238 team.setProtectedTitleCache(false);
1239 }else if (containsEdOrColon(refAuthorString)){
1240 //nothing to do, it is expected to be protected
1241
1242 }else{
1243 team.setTitleCache(refAuthorString, true);
1244 logger.warn("Creation of titleCache for team with members did not (fully) work: " + refAuthorString + " <-> " + formatter.getTitleCache(team)+ " : " + refId);
1245 }
1246 }
1247
1248 private static void checkPerson(Person person, String refAuthorString, boolean hasDedupMember, Integer refId) {
1249 PersonDefaultCacheStrategy formatter = (PersonDefaultCacheStrategy) person.getCacheStrategy();
1250
1251 String oldTitleCache = person.getTitleCache();
1252 boolean oldTitleCacheProtected = person.isProtectedTitleCache();
1253
1254 if (! oldTitleCache.equals(refAuthorString)){
1255 logger.error("Old titleCache does not equal refAuthorString this should not happen. "+ oldTitleCache + " <-> " + refAuthorString + "; refId = " + refId);
1256 }
1257
1258 boolean protect = true;
1259 person.setProtectedTitleCache(false);
1260 if (refAuthorString.equals(formatter.getTitleCache(person))){
1261 protect = false;
1262 }else if(formatter.getFullTitle(person).equals(refAuthorString)){
1263 //.. or teams with initials first
1264 protect = false;
1265 }else{
1266 //keep protected, see below
1267 }
1268
1269 if (hasDedupMember){
1270 //restore
1271 //TODO maybe even do not use dedup for testing
1272 person.setTitleCache(oldTitleCache, oldTitleCacheProtected);
1273 if (protect != oldTitleCacheProtected){
1274 logger.warn("Deduplicated person protection requirement unclear for "+refAuthorString+". New:"+protect+"/Old:"+oldTitleCacheProtected+"; RefId: " + refId);
1275 }
1276 }else{
1277 if (protect){
1278 logger.warn("Creation of titleCache for person (converted from team) with members did not (fully) work: " + refAuthorString + " <-> " + formatter.getTitleCache(person)+ " : " + refId);
1279 person.setTitleCache(refAuthorString, protect);
1280 }else{
1281 //keep unprotected
1282 }
1283 }
1284 }
1285
1286 private static boolean containsEdOrColon(String str) {
1287 if (str.contains(" ed.") || str.contains(" Ed.") || str.contains("(ed.")
1288 || str.contains("[ed.") || str.contains("(Eds)") || str.contains("(Eds.)") ||
1289 str.contains("(eds.)") || str.contains(":")|| str.contains(";") || str.contains("Publ. & Inform. Directorate")
1290 || str.contains("Anonymous [Department of Botany, Faculty of Science, FER-ZPR, University of Zagreb]")
1291 || str.contains("Davis, P. H. (Güner, A. & al.)")){
1292 return true;
1293 }else{
1294 return false;
1295 }
1296 }
1297
1298 /**
1299 * @param nextSplit
1300 * @return
1301 */
1302 private static boolean isInitial(String str) {
1303 if (str == null){
1304 return false;
1305 }
1306 boolean matches = str.trim().matches("(\\p{javaUpperCase}|Yu|Ya|Th|Ch|Lj|Sz|Dz|Sh|Ju|R. M. da S)\\.?"
1307 + "(\\s*[-\\s]\\s*(\\p{javaUpperCase}|Yu|Ja|Kh|Tz|Ya|Th|Ju)\\.?)*(\\s+(van|von|de|de la|del|da|van der))?");
1308 return matches;
1309 }
1310
1311 private <T extends TeamOrPersonBase<?>> T deduplicatePersonOrTeam(BerlinModelImportState state,T author) {
1312 T result = deduplicationHelper.getExistingAuthor(state, author);
1313 return result;
1314 }
1315
1316 private Reference deduplicateReference(BerlinModelImportState state,Reference ref) {
1317 Reference result = deduplicationHelper.getExistingReference(state, ref);
1318 return result;
1319 }
1320
1321 private static Person makePerson(String full, boolean followedByInitial, Integer refId) {
1322 Person person = Person.NewInstance();
1323 person.setTitleCache(full, true);
1324 if (!full.matches(".*[\\s\\.].*")){
1325 person.setFamilyName(full);
1326 person.setProtectedTitleCache(false);
1327 }else{
1328 parsePerson(person, full, true, followedByInitial);
1329 }
1330
1331 if ((full.length() <= 2 && !full.matches("(Li|Bo|Em|Ay|Ma)")) || (full.length() == 3 && full.endsWith(".") && !full.equals("al.")) ){
1332 logger.warn("Unexpected short nom author name part: " + full + "; " + refId);
1333 }
1334
1335 return person;
1336 }
1337
1338 private static void parsePerson(Person person, String str, boolean preliminary, boolean followedByInitial) {
1339 String capWord = "\\p{javaUpperCase}\\p{javaLowerCase}{2,}";
1340 String famStart = "(Le |D'|'t |Mc|Mac|Des |d'|Du |De |Al-)";
1341 String regEx = "((\\p{javaUpperCase}|Ya|Th|Ju|Kh|An)\\.([\\s-]\\p{javaUpperCase}\\.)*(\\s(de|del|da|von|van|van der|v.|af|zu|von M. Und L.))?\\s)("
1342 + famStart + "?" + capWord + "((-| y | i | é | de | de la )" + capWord + ")?)";
1343 Matcher matcher = Pattern.compile(regEx).matcher(str);
1344 if (matcher.matches()){
1345 person.setProtectedTitleCache(false);
1346 String familyName = matcher.group(6).trim();
1347 person.setFamilyName(familyName);
1348 person.setInitials(matcher.group(1).trim());
1349 }else{
1350 String regEx2 = "("+ capWord + "\\s" + capWord + "|Le Sueur|Beck von Mannagetta|Di Martino|Galán de Mera|Van Der Maesen|Farga i Arquimbau|Perez de Paz|Borzatti de Loewenstern|Lo Giudice|Perez de Paz)";
1351 Matcher matcher2 = Pattern.compile(regEx2).matcher(str);
1352 if (followedByInitial && matcher2.matches()){
1353 person.setFamilyName(str);
1354 person.setProtectedTitleCache(false);
1355 }else{
1356 person.setTitleCache(str, preliminary);
1357 }
1358 }
1359 }
1360
1361 private static boolean handleSimilarAuthors(BerlinModelImportState state, String refAuthorString,
1362 TeamOrPersonBase<?> nomAuthor, int refId) {
1363 String nomTitle = nomAuthor.getTitleCache();
1364
1365 if (refAuthorString.equals(nomAuthor.getNomenclaturalTitleCache())){
1366 //nomTitle equal
1367 return true;
1368 }else{
1369 if (refAuthorString.replace(" & ", ", ").equals(nomTitle.replace(" & ", ", "))){
1370 //nomTitle equal except for "&"
1371 return true;
1372 }
1373 String nomFullTitle = nomAuthor.getFullTitle();
1374 if (refAuthorString.replace(" & ", ", ").equals(nomFullTitle.replace(" & ", ", "))){
1375 return true;
1376 }
1377
1378 if (nomAuthor.isInstanceOf(Person.class)){
1379 Person person = CdmBase.deproxy(nomAuthor, Person.class);
1380
1381 //refAuthor has initials behind, nom Author in front // the other way round is handled in firstIsFullNameOfInitialName
1382 if (refAuthorString.contains(",") && !nomTitle.contains(",") ){
1383 String[] splits = refAuthorString.split(",");
1384 if (splits.length == 2){
1385 String newMatch = splits[1].trim() + " " + splits[0].trim();
1386 if (newMatch.equals(nomTitle)){
1387 if (isBlank(person.getFamilyName())){
1388 person.setFamilyName(splits[0].trim());
1389 }
1390 if (isBlank(person.getInitials())){
1391 person.setInitials(splits[1].trim());
1392 }
1393 return true;
1394 }
1395 }
1396 }
1397
1398 if (refAuthorIsFamilyAuthorOfNomAuthor(state, refAuthorString, person)){
1399 return true;
1400 }
1401
1402 if (firstIsFullNameOfInitialName(state, refAuthorString, person, refId)){
1403 return true;
1404 }
1405 }
1406
1407 }
1408 return false;
1409 }
1410
1411 /**
1412 * @param state
1413 * @param refAuthorString
1414 * @param person
1415 * @return
1416 */
1417 private static boolean refAuthorIsFamilyAuthorOfNomAuthor(BerlinModelImportState state, String refAuthorString,
1418 Person person) {
1419 if (refAuthorString.equals(person.getFamilyName())){
1420 return true;
1421 }else{
1422 return false;
1423 }
1424 }
1425
1426 /**
1427 * @param state
1428 * @param refAuthorString
1429 * @param nomAuthor
1430 * @return
1431 */
1432 private static boolean firstIsFullNameOfInitialName(BerlinModelImportState state, String fullName,
1433 Person initialAuthor, int refId) {
1434 String initialName = initialAuthor.getTitleCache();
1435
1436 String[] fullSplits = fullName.split(",");
1437 String[] initialSplits = initialName.split(",");
1438
1439 if (fullSplits.length == 2 && initialSplits.length == 2){
1440 String[] fullGivenName = fullSplits[1].trim().split(" ");
1441 String[] initialsGivenName = initialSplits[1].trim().split(" ");
1442 boolean result = compareFamilyAndInitials(fullSplits[0], initialSplits[0], fullGivenName, initialsGivenName);
1443 if (result){
1444 setGivenName(state, fullSplits[1], initialAuthor, refId);
1445 }
1446 return result;
1447 }else if (fullSplits.length == 1 && initialSplits.length == 2){
1448 String[] fullSingleSplits = fullName.split(" ");
1449 String fullFamily = fullSingleSplits[fullSingleSplits.length-1];
1450 String[] fullGivenName = Arrays.copyOfRange(fullSingleSplits, 0, fullSingleSplits.length-1);
1451 String[] initialsGivenName = initialSplits[1].trim().split(" ");
1452 boolean result = compareFamilyAndInitials(fullFamily, initialSplits[0], fullGivenName, initialsGivenName);
1453 if (result){
1454 if(hasAtLeastOneFullName(fullGivenName)){
1455 setGivenName(state, CdmUtils.concat(" ", fullGivenName), initialAuthor, refId);
1456 }
1457 }
1458 return result;
1459 }else if (fullSplits.length == 1 && initialAuthor.getInitials() == null){
1460 //don't if this will be implemented, initialAuthors with only nomencl.Author set
1461 }
1462
1463 return false;
1464 }
1465
1466 /**
1467 * @param fullGivenName
1468 * @return
1469 */
1470 private static boolean hasAtLeastOneFullName(String[] fullGivenName) {
1471 for (String singleName : fullGivenName){
1472 if (!singleName.endsWith(".") && singleName.length() > 2 && !singleName.matches("(von|van)") ){
1473 return true;
1474 }
1475 }
1476 return false;
1477 }
1478
1479 private static void setGivenName(BerlinModelImportState state, String givenName, Person person, int refId) {
1480 givenName = givenName.trim();
1481 if(person.getGivenName() == null || person.getGivenName().equals(givenName)){
1482 person.setGivenName(givenName);
1483 }else{
1484 logger.warn("RefAuthor given name and existing given name differ: " + givenName + " <-> " + person.getGivenName() + "; RefId + " + refId);
1485 }
1486 }
1487
1488 protected static boolean compareFamilyAndInitials(String fullFamilyName, String initialsFamilyName,
1489 String[] fullGivenName, String[] initialsGivenName) {
1490 if (!fullFamilyName.equals(initialsFamilyName)){
1491 return false;
1492 }
1493 if (fullGivenName.length == initialsGivenName.length){
1494 for (int i =0; i< fullGivenName.length ; i++){
1495 if (fullGivenName[i].length() == 0 //comma ending not allowed
1496 || initialsGivenName[i].length() != 2 //only K. or similar allowed
1497 || fullGivenName[i].length() < initialsGivenName[i].length() //fullFirstName must be longer than abbrev Name
1498 || !initialsGivenName[i].endsWith(".") //initials must end with "."
1499 || !fullGivenName[i].startsWith(initialsGivenName[i].replace(".", ""))){ //start with same letter
1500 if (fullGivenName[i].matches("(von|van|de|zu)") && fullGivenName[i].equals(initialsGivenName[i])){
1501 continue;
1502 }else{
1503 return false;
1504 }
1505 }
1506 }
1507 return true;
1508 }else{
1509 return false;
1510 }
1511 }
1512
1513 public Set<String> getObligatoryAttributes(boolean lowerCase, BerlinModelImportConfigurator config){
1514 Set<String> result = new HashSet<>();
1515 Class<ICdmImport>[] ioClassList = config.getIoClassList();
1516 result.addAll(Arrays.asList(unclearMappers));
1517 result.addAll(Arrays.asList(createdAndNotesAttributes));
1518 result.addAll(Arrays.asList(operationalAttributes));
1519 CdmIoMapping mapping = new CdmIoMapping();
1520 for (CdmAttributeMapperBase mapper : classMappers){
1521 mapping.addMapper(mapper);
1522 }
1523 result.addAll(mapping.getSourceAttributes());
1524 if (lowerCase){
1525 Set<String> lowerCaseResult = new HashSet<>();
1526 for (String str : result){
1527 if (str != null){lowerCaseResult.add(str.toLowerCase());}
1528 }
1529 result = lowerCaseResult;
1530 }
1531 return result;
1532 }
1533
1534 @Override
1535 protected boolean doCheck(BerlinModelImportState state){
1536 BerlinModelReferenceImportValidator validator = new BerlinModelReferenceImportValidator();
1537 return validator.validate(state, this);
1538 }
1539
1540 @Override
1541 protected boolean isIgnore(BerlinModelImportState state){
1542 return (state.getConfig().getDoReferences() == IImportConfigurator.DO_REFERENCES.NONE);
1543 }
1544
1545 }