improve fauna europaea author import
[cdmlib-apps.git] / cdm-pesi / src / main / java / eu / etaxonomy / cdm / io / pesi / faunaEuropaea / FaunaEuropaeaAuthorImport.java
1 /**
2 * Copyright (C) 2007 EDIT
3 * European Distributed Institute of Taxonomy
4 * http://www.e-taxonomy.eu
5 *
6 * The contents of this file are subject to the Mozilla Public License Version 1.1
7 * See LICENSE.TXT at the top of this package for the full license terms.
8 */
9 package eu.etaxonomy.cdm.io.pesi.faunaEuropaea;
10
11 import java.sql.ResultSet;
12 import java.sql.SQLException;
13 import java.util.Collection;
14 import java.util.Map;
15
16 import org.apache.log4j.Logger;
17 import org.springframework.stereotype.Component;
18 import org.springframework.transaction.TransactionStatus;
19
20 import eu.etaxonomy.cdm.io.common.ICdmIO;
21 import eu.etaxonomy.cdm.io.common.ImportHelper;
22 import eu.etaxonomy.cdm.io.common.MapWrapper;
23 import eu.etaxonomy.cdm.io.common.Source;
24 import eu.etaxonomy.cdm.model.agent.Person;
25 import eu.etaxonomy.cdm.model.agent.Team;
26 import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
27 import eu.etaxonomy.cdm.model.common.CdmBase;
28
29
30 /**
31 * @author a.babadshanjan
32 * @created 12.05.2009
33 * @version 1.0
34 */
35 @Component
36 public class FaunaEuropaeaAuthorImport extends FaunaEuropaeaImportBase {
37 private static final Logger logger = Logger.getLogger(FaunaEuropaeaAuthorImport.class);
38
39 private static int modCount = 1000;
40 private final static String authorSeparator = ", ";
41 private final static String lastAuthorSeparator = " & ";
42
43
44 /* (non-Javadoc)
45 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
46 */
47 @Override
48 protected boolean doCheck(FaunaEuropaeaImportState state){
49 boolean result = true;
50 logger.warn("No checking for Authors not implemented");
51
52 return result;
53 }
54
55 /* (non-Javadoc)
56 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
57 */
58 @Override
59 protected void doInvoke(FaunaEuropaeaImportState state){
60 /*
61 logger.warn("Start author doInvoke");
62 ProfilerController.memorySnapshot();
63 */
64 if (!state.getConfig().isDoAuthors()){
65 return;
66 }
67 Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
68 MapWrapper<TeamOrPersonBase<?>> authorStore = (MapWrapper<TeamOrPersonBase<?>>)stores.get(ICdmIO.TEAM_STORE);
69 TransactionStatus txStatus = null;
70
71 FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
72 Source source = fauEuConfig.getSource();
73
74 String namespace = "AuthorTeam";
75
76 if(logger.isInfoEnabled()) { logger.info("Start making Authors..."); }
77
78 try {
79
80 String strQuery =
81 " SELECT * " +
82 " FROM author " ;
83 ResultSet rs = source.getResultSet(strQuery) ;
84
85 int i = 0;
86 while (rs.next()) {
87
88 if ((i++ % modCount) == 0 && i!= 1 ) {
89 if(logger.isDebugEnabled()) {
90 logger.debug("Authors retrieved: " + (i-1));
91 }
92 }
93
94 int authorId = rs.getInt("aut_id");
95 String authorName = rs.getString("aut_name");
96
97 String auctWithNecRegEx = "\\bauct\\b\\.?.*\\bnec\\b\\.?.*";
98 String necAuctRegEx = "\\bnec\\b\\.?.*\\bauct\\b\\.?.*";
99
100 boolean auctWithNecFound = expressionMatches(auctWithNecRegEx, authorName);
101 boolean necAuctFound = expressionMatches(necAuctRegEx, authorName);
102 if (auctWithNecFound){
103 logger.debug("authorName before auct nec string is removed" + authorName);
104 authorName = authorName.substring(expressionEnd("nec\\.?", authorName)+1, authorName.length());
105 logger.debug("authorName after auct nec string is removed" + authorName);
106 }
107
108 if (necAuctFound){
109 logger.debug("authorName before nec auct string is removed" + authorName);
110 authorName = authorName.substring(0, authorName.indexOf("nec")-1);
111 logger.debug("authorName before nec auct string is removed" + authorName);
112 }
113 TeamOrPersonBase<?> author = null;
114
115 try {
116 author = parseAuthorString(authorName);
117 ImportHelper.setOriginalSource(author, fauEuConfig.getSourceReference(), authorId, namespace);
118
119 if (!authorStore.containsId(authorId)) {
120 authorStore.put(authorId, author);
121 if (logger.isDebugEnabled()) { logger.debug("Stored author (" + authorId + ") " + authorName); }
122 } else {
123 logger.warn("Not imported author with duplicated aut_id (" + authorId + ") " + authorName);
124 }
125 } catch (Exception e) {
126 logger.warn("An exception occurred when creating author with id " + authorId + ". Author could not be saved.");
127 }
128 }
129
130 if(logger.isInfoEnabled()) { logger.info("Saving authors ..."); }
131
132 txStatus = startTransaction();
133
134 // save authors
135 getAgentService().save((Collection)authorStore.objects());
136
137 commitTransaction(txStatus);
138
139 if(logger.isInfoEnabled()) { logger.info("End making authors ..."); }
140
141 return;
142
143 } catch (SQLException e) {
144 logger.error("SQLException:" + e);
145 state.setUnsuccessfull();
146 }
147 }
148
149 /* (non-Javadoc)
150 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
151 */
152 @Override
153 protected boolean isIgnore(FaunaEuropaeaImportState state){
154 return ! state.getConfig().isDoAuthors();
155 }
156
157 public static TeamOrPersonBase<?> parseAuthorString(String authorName){
158 TeamOrPersonBase<?> author = null;
159 String[] teamMembers = authorName.split(authorSeparator);
160 String lastMember;
161 String[] lastMembers;
162 Person teamMember;
163 if (teamMembers.length>1){
164 lastMember = teamMembers[teamMembers.length -1];
165 lastMembers = lastMember.split(lastAuthorSeparator);
166 teamMembers[teamMembers.length -1] = "";
167 author = Team.NewInstance();
168 for(String member:teamMembers){
169 if (!member.equals("")){
170 teamMember = Person.NewInstance();
171 teamMember.setTitleCache(member, true);
172 ((Team)author).addTeamMember(teamMember);
173 }
174 }
175 if (lastMembers != null){
176 for(String member:lastMembers){
177 teamMember = Person.NewInstance();
178 teamMember.setTitleCache(member, true);
179 ((Team)author).addTeamMember(teamMember);
180 }
181 }
182
183 } else {
184 teamMembers = authorName.split(lastAuthorSeparator);
185 if (teamMembers.length>1){
186 author = Team.NewInstance();
187 for(String member:teamMembers){
188 teamMember = Person.NewInstance();
189 teamMember.setTitleCache(member, true);
190 ((Team)author).addTeamMember(teamMember);
191
192 }
193 }else{
194 author = Person.NewInstance();
195 author.setTitleCache(authorName, true);
196 }
197 }
198 author.getTitleCache();
199 return author;
200 }
201
202 }