1
|
/**
|
2
|
* Copyright (C) 2007 EDIT
|
3
|
* European Distributed Institute of Taxonomy
|
4
|
* http://www.e-taxonomy.eu
|
5
|
*
|
6
|
* The contents of this file are subject to the Mozilla Public License Version 1.1
|
7
|
* See LICENSE.TXT at the top of this package for the full license terms.
|
8
|
*/
|
9
|
package eu.etaxonomy.cdm.io.pesi.faunaEuropaea;
|
10
|
|
11
|
import java.sql.ResultSet;
|
12
|
import java.sql.SQLException;
|
13
|
import java.util.Collection;
|
14
|
import java.util.Map;
|
15
|
|
16
|
import org.apache.commons.lang.StringUtils;
|
17
|
import org.apache.log4j.Logger;
|
18
|
import org.springframework.stereotype.Component;
|
19
|
import org.springframework.transaction.TransactionStatus;
|
20
|
|
21
|
import eu.etaxonomy.cdm.io.common.ICdmIO;
|
22
|
import eu.etaxonomy.cdm.io.common.ImportHelper;
|
23
|
import eu.etaxonomy.cdm.io.common.MapWrapper;
|
24
|
import eu.etaxonomy.cdm.io.common.Source;
|
25
|
import eu.etaxonomy.cdm.model.agent.Person;
|
26
|
import eu.etaxonomy.cdm.model.agent.Team;
|
27
|
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
|
28
|
import eu.etaxonomy.cdm.model.common.CdmBase;
|
29
|
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
|
30
|
|
31
|
|
32
|
/**
|
33
|
* @author a.babadshanjan
|
34
|
* @since 12.05.2009
|
35
|
*/
|
36
|
@Component
|
37
|
public class FaunaEuropaeaAuthorImport extends FaunaEuropaeaImportBase {
|
38
|
|
39
|
private static final long serialVersionUID = 1L;
|
40
|
|
41
|
private static final Logger logger = Logger.getLogger(FaunaEuropaeaAuthorImport.class);
|
42
|
|
43
|
private static int modCount = 1000;
|
44
|
private final static String authorSeparator = ", ";
|
45
|
private final static String lastAuthorSeparator = " & ";
|
46
|
private static String capitalWord = "\\p{javaUpperCase}\\p{javaLowerCase}*";
|
47
|
protected static String fWs = "\\s*";
|
48
|
protected static String oWs = "\\s+";
|
49
|
protected static String finalTeamSplitter = "(" + fWs + "(&)" + fWs + "|" + oWs + "et" + oWs + ")";
|
50
|
protected static String notFinalTeamSplitter = "((?:" + fWs + "," + fWs + ")(?!([A-Z][\\.]))"+"|" + finalTeamSplitter + ")";
|
51
|
protected static String test = "(, \\s(?!([A-Z].|\\s|$))|,$)" ;
|
52
|
//protected static String test = "((,\\s("+capitalWord+")+)|(,($|,?!(\\s))))";
|
53
|
|
54
|
@Override
|
55
|
protected boolean doCheck(FaunaEuropaeaImportState state){
|
56
|
boolean result = true;
|
57
|
logger.warn("No checking for Authors not implemented");
|
58
|
|
59
|
return result;
|
60
|
}
|
61
|
|
62
|
@Override
|
63
|
protected void doInvoke(FaunaEuropaeaImportState state){
|
64
|
/*
|
65
|
logger.warn("Start author doInvoke");
|
66
|
ProfilerController.memorySnapshot();
|
67
|
*/
|
68
|
if (!state.getConfig().isDoAuthors()){
|
69
|
return;
|
70
|
}
|
71
|
Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
|
72
|
MapWrapper<TeamOrPersonBase<?>> authorStore = (MapWrapper<TeamOrPersonBase<?>>)stores.get(ICdmIO.TEAM_STORE);
|
73
|
TransactionStatus txStatus = null;
|
74
|
|
75
|
FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
|
76
|
Source source = fauEuConfig.getSource();
|
77
|
|
78
|
String namespace = "AuthorTeam";
|
79
|
|
80
|
if(logger.isInfoEnabled()) { logger.info("Start making Authors..."); }
|
81
|
|
82
|
try {
|
83
|
|
84
|
String strQuery =
|
85
|
" SELECT * " +
|
86
|
" FROM author " ;
|
87
|
ResultSet rs = source.getResultSet(strQuery) ;
|
88
|
|
89
|
int i = 0;
|
90
|
while (rs.next()) {
|
91
|
|
92
|
if ((i++ % modCount) == 0 && i!= 1 ) {
|
93
|
if(logger.isDebugEnabled()) {
|
94
|
logger.debug("Authors retrieved: " + (i-1));
|
95
|
}
|
96
|
}
|
97
|
|
98
|
int authorId = rs.getInt("aut_id");
|
99
|
String authorName = rs.getString("aut_name");
|
100
|
|
101
|
String auctWithNecRegEx = "\\bauct\\b\\.?.*\\bnec\\b\\.?.*";
|
102
|
String necAuctRegEx = "\\bnec\\b\\.?.*\\bauct\\b\\.?.*";
|
103
|
|
104
|
boolean auctWithNecFound = expressionMatches(auctWithNecRegEx, authorName);
|
105
|
boolean necAuctFound = expressionMatches(necAuctRegEx, authorName);
|
106
|
if (auctWithNecFound){
|
107
|
logger.debug("authorName before auct nec string is removed" + authorName);
|
108
|
authorName = authorName.substring(expressionEnd("nec\\.?", authorName)+1, authorName.length());
|
109
|
logger.debug("authorName after auct nec string is removed" + authorName);
|
110
|
}
|
111
|
|
112
|
if (necAuctFound){
|
113
|
logger.debug("authorName before nec auct string is removed" + authorName);
|
114
|
authorName = authorName.substring(0, authorName.indexOf("nec")-1);
|
115
|
logger.debug("authorName before nec auct string is removed" + authorName);
|
116
|
}
|
117
|
TeamOrPersonBase<?> author = null;
|
118
|
|
119
|
try {
|
120
|
NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
|
121
|
|
122
|
if (StringUtils.isNotBlank(authorName)){
|
123
|
//author = parser.author(authorName);
|
124
|
author = this.parseNomAuthorString(authorName);
|
125
|
ImportHelper.setOriginalSource(author, fauEuConfig.getSourceReference(), authorId, namespace);
|
126
|
|
127
|
if (!authorStore.containsId(authorId)) {
|
128
|
authorStore.put(authorId, author);
|
129
|
if (logger.isDebugEnabled()) { logger.debug("Stored author (" + authorId + ") " + authorName); }
|
130
|
} else {
|
131
|
logger.warn("Not imported author with duplicated aut_id (" + authorId + ") " + authorName);
|
132
|
}
|
133
|
}
|
134
|
} catch (Exception e) {
|
135
|
logger.warn("An exception occurred when creating author with id " + authorId + ". Author could not be saved." + e.getMessage());
|
136
|
}
|
137
|
}
|
138
|
|
139
|
if(logger.isInfoEnabled()) { logger.info("Saving authors ..."); }
|
140
|
|
141
|
txStatus = startTransaction();
|
142
|
|
143
|
// save authors
|
144
|
getAgentService().save((Collection)authorStore.objects());
|
145
|
|
146
|
commitTransaction(txStatus);
|
147
|
|
148
|
if(logger.isInfoEnabled()) { logger.info("End making authors ..."); }
|
149
|
|
150
|
return;
|
151
|
|
152
|
} catch (SQLException e) {
|
153
|
logger.error("SQLException:" + e);
|
154
|
state.setUnsuccessfull();
|
155
|
}
|
156
|
}
|
157
|
|
158
|
@Override
|
159
|
protected boolean isIgnore(FaunaEuropaeaImportState state){
|
160
|
return ! state.getConfig().isDoAuthors();
|
161
|
}
|
162
|
|
163
|
public static TeamOrPersonBase<?> parseAuthorStringOld(String authorName){
|
164
|
TeamOrPersonBase<?> author = null;
|
165
|
String[] teamMembers = authorName.split(authorSeparator);
|
166
|
String lastMember;
|
167
|
String[] lastMembers;
|
168
|
Person teamMember;
|
169
|
if (teamMembers.length>1){
|
170
|
lastMember = teamMembers[teamMembers.length -1];
|
171
|
lastMembers = lastMember.split(lastAuthorSeparator);
|
172
|
teamMembers[teamMembers.length -1] = "";
|
173
|
author = Team.NewInstance();
|
174
|
for(String member:teamMembers){
|
175
|
if (!member.equals("")){
|
176
|
teamMember = Person.NewInstance();
|
177
|
teamMember.setTitleCache(member, true);
|
178
|
((Team)author).addTeamMember(teamMember);
|
179
|
}
|
180
|
}
|
181
|
if (lastMembers != null){
|
182
|
for(String member:lastMembers){
|
183
|
teamMember = Person.NewInstance();
|
184
|
teamMember.setTitleCache(member, true);
|
185
|
((Team)author).addTeamMember(teamMember);
|
186
|
}
|
187
|
}
|
188
|
|
189
|
} else {
|
190
|
teamMembers = authorName.split(lastAuthorSeparator);
|
191
|
if (teamMembers.length>1){
|
192
|
author = Team.NewInstance();
|
193
|
for(String member:teamMembers){
|
194
|
teamMember = Person.NewInstance();
|
195
|
teamMember.setTitleCache(member, true);
|
196
|
((Team)author).addTeamMember(teamMember);
|
197
|
|
198
|
}
|
199
|
}else{
|
200
|
author = Person.NewInstance();
|
201
|
author.setTitleCache(authorName, true);
|
202
|
}
|
203
|
}
|
204
|
author.getTitleCache();
|
205
|
return author;
|
206
|
}
|
207
|
|
208
|
/**
|
209
|
* @param refAuthor
|
210
|
* @return
|
211
|
*/
|
212
|
public static TeamOrPersonBase<?> parseNomAuthorString(String refAuthor) {
|
213
|
TeamOrPersonBase<?> author = null;
|
214
|
//possible strings: Lastname, A., Lastname B. & Lastname C.
|
215
|
//Lastname A, Lastname B & Lastname
|
216
|
//Lastname A Lastname B & Lastname C
|
217
|
//Lastname, J & Lastname, L
|
218
|
String[] firstTeamMembers = refAuthor.split(finalTeamSplitter);
|
219
|
String[] teamMembers = null;
|
220
|
String lastMember = null;
|
221
|
lastMember = firstTeamMembers[firstTeamMembers.length-1];
|
222
|
|
223
|
if (firstTeamMembers.length == 2){
|
224
|
teamMembers = firstTeamMembers[0].split(test);
|
225
|
}
|
226
|
Person teamMember;
|
227
|
author = Team.NewInstance();
|
228
|
if (teamMembers != null){
|
229
|
for(String member:teamMembers){
|
230
|
if (!member.trim().equals("")){
|
231
|
teamMember = Person.NewInstance();
|
232
|
teamMember.setTitleCache(member, true);
|
233
|
((Team)author).addTeamMember(teamMember);
|
234
|
}
|
235
|
}
|
236
|
teamMember = Person.NewInstance();
|
237
|
teamMember.setTitleCache(lastMember, true);
|
238
|
((Team)author).addTeamMember(teamMember);
|
239
|
|
240
|
}else{
|
241
|
teamMembers = lastMember.split(test);
|
242
|
if (teamMembers.length >1){
|
243
|
for(String member:teamMembers){
|
244
|
if (!member.trim().equals("")){
|
245
|
teamMember = Person.NewInstance();
|
246
|
teamMember.setTitleCache(member, true);
|
247
|
((Team)author).addTeamMember(teamMember);
|
248
|
}
|
249
|
}
|
250
|
}else{
|
251
|
author = Person.NewInstance();
|
252
|
author.setTitleCache(lastMember, true);
|
253
|
}
|
254
|
|
255
|
|
256
|
}
|
257
|
author.getTitleCache();
|
258
|
return author;
|
259
|
}
|
260
|
|
261
|
}
|