Project

General

Profile

Download (9.17 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.pesi.faunaEuropaea;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.util.Collection;
14
import java.util.Map;
15

    
16
import org.apache.commons.lang.StringUtils;
17
import org.apache.log4j.Logger;
18
import org.springframework.stereotype.Component;
19
import org.springframework.transaction.TransactionStatus;
20

    
21
import eu.etaxonomy.cdm.io.common.ICdmIO;
22
import eu.etaxonomy.cdm.io.common.ImportHelper;
23
import eu.etaxonomy.cdm.io.common.MapWrapper;
24
import eu.etaxonomy.cdm.io.common.Source;
25
import eu.etaxonomy.cdm.model.agent.Person;
26
import eu.etaxonomy.cdm.model.agent.Team;
27
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
30

    
31

    
32
/**
33
 * @author a.babadshanjan
34
 * @since 12.05.2009
35
 */
36
@Component
37
public class FaunaEuropaeaAuthorImport extends FaunaEuropaeaImportBase {
38

    
39
    private static final long serialVersionUID = 1L;
40

    
41
    private static final Logger logger = Logger.getLogger(FaunaEuropaeaAuthorImport.class);
42

    
43
	private static int modCount = 1000;
44
	private final static String authorSeparator = ", ";
45
	private final static String lastAuthorSeparator = " & ";
46
	private static String capitalWord = "\\p{javaUpperCase}\\p{javaLowerCase}*";
47
	 protected static String fWs = "\\s*";
48
	 protected static String oWs = "\\s+";
49
	 protected static String finalTeamSplitter = "(" + fWs + "(&)" + fWs + "|" + oWs + "et" + oWs + ")";
50
	protected static String notFinalTeamSplitter = "((?:" + fWs + "," + fWs + ")(?!([A-Z][\\.]))"+"|" + finalTeamSplitter + ")";
51
	protected static String test = "(, \\s(?!([A-Z].|\\s|$))|,$)" ;
52
	//protected static String test = "((,\\s("+capitalWord+")+)|(,($|,?!(\\s))))";
53

    
54
	@Override
55
	protected boolean doCheck(FaunaEuropaeaImportState state){
56
		boolean result = true;
57
		logger.warn("No checking for Authors not implemented");
58

    
59
		return result;
60
	}
61

    
62
	@Override
63
	protected void doInvoke(FaunaEuropaeaImportState state){
64
		/*
65
		logger.warn("Start author doInvoke");
66
		ProfilerController.memorySnapshot();
67
		*/
68
		if (!state.getConfig().isDoAuthors()){
69
			return;
70
		}
71
		Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
72
		MapWrapper<TeamOrPersonBase<?>> authorStore = (MapWrapper<TeamOrPersonBase<?>>)stores.get(ICdmIO.TEAM_STORE);
73
		TransactionStatus txStatus = null;
74

    
75
		FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
76
		Source source = fauEuConfig.getSource();
77

    
78
		String namespace = "AuthorTeam";
79

    
80
		if(logger.isInfoEnabled()) { logger.info("Start making Authors..."); }
81

    
82
		try {
83

    
84
			String strQuery =
85
				" SELECT *  " +
86
				" FROM author " ;
87
			ResultSet rs = source.getResultSet(strQuery) ;
88

    
89
			int i = 0;
90
			while (rs.next()) {
91

    
92
				if ((i++ % modCount) == 0 && i!= 1 ) {
93
					if(logger.isDebugEnabled()) {
94
						logger.debug("Authors retrieved: " + (i-1));
95
					}
96
				}
97

    
98
				int authorId = rs.getInt("aut_id");
99
				String authorName = rs.getString("aut_name");
100

    
101
				String auctWithNecRegEx = "\\bauct\\b\\.?.*\\bnec\\b\\.?.*";
102
				String necAuctRegEx = "\\bnec\\b\\.?.*\\bauct\\b\\.?.*";
103

    
104
				boolean auctWithNecFound = expressionMatches(auctWithNecRegEx, authorName);
105
				boolean necAuctFound = expressionMatches(necAuctRegEx, authorName);
106
				if (auctWithNecFound){
107
					logger.debug("authorName before auct nec string is removed" + authorName);
108
					authorName = authorName.substring(expressionEnd("nec\\.?", authorName)+1, authorName.length());
109
					logger.debug("authorName after auct nec string is removed" + authorName);
110
				}
111

    
112
				if (necAuctFound){
113
					logger.debug("authorName before nec auct string is removed" + authorName);
114
					authorName = authorName.substring(0, authorName.indexOf("nec")-1);
115
					logger.debug("authorName before nec auct string is removed" + authorName);
116
				}
117
				TeamOrPersonBase<?> author = null;
118

    
119
				try {
120
				    NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
121

    
122
			        if (StringUtils.isNotBlank(authorName)){
123
			            //author = parser.author(authorName);
124
			            author = this.parseNomAuthorString(authorName);
125
    			        ImportHelper.setOriginalSource(author, fauEuConfig.getSourceReference(), authorId, namespace);
126

    
127
    					if (!authorStore.containsId(authorId)) {
128
    						authorStore.put(authorId, author);
129
    						if (logger.isDebugEnabled()) { logger.debug("Stored author (" + authorId + ") " + authorName); }
130
    					} else {
131
    						logger.warn("Not imported author with duplicated aut_id (" + authorId + ") " + authorName);
132
    					}
133
			        }
134
				} catch (Exception e) {
135
					logger.warn("An exception occurred when creating author with id " + authorId + ". Author could not be saved." + e.getMessage());
136
				}
137
			}
138

    
139
			if(logger.isInfoEnabled()) { logger.info("Saving authors ..."); }
140

    
141
			txStatus = startTransaction();
142

    
143
			// save authors
144
			getAgentService().save((Collection)authorStore.objects());
145

    
146
			commitTransaction(txStatus);
147

    
148
			if(logger.isInfoEnabled()) { logger.info("End making authors ..."); }
149

    
150
			return;
151

    
152
		} catch (SQLException e) {
153
			logger.error("SQLException:" +  e);
154
			state.setUnsuccessfull();
155
		}
156
	}
157

    
158
	@Override
159
    protected boolean isIgnore(FaunaEuropaeaImportState state){
160
		return ! state.getConfig().isDoAuthors();
161
	}
162

    
163
	public static TeamOrPersonBase<?> parseAuthorStringOld(String authorName){
164
	    TeamOrPersonBase<?> author = null;
165
	    String[] teamMembers = authorName.split(authorSeparator);
166
        String lastMember;
167
        String[] lastMembers;
168
        Person teamMember;
169
        if (teamMembers.length>1){
170
            lastMember = teamMembers[teamMembers.length -1];
171
            lastMembers = lastMember.split(lastAuthorSeparator);
172
            teamMembers[teamMembers.length -1] = "";
173
            author = Team.NewInstance();
174
            for(String member:teamMembers){
175
                if (!member.equals("")){
176
                    teamMember = Person.NewInstance();
177
                    teamMember.setTitleCache(member, true);
178
                   ((Team)author).addTeamMember(teamMember);
179
                }
180
            }
181
            if (lastMembers != null){
182
                for(String member:lastMembers){
183
                   teamMember = Person.NewInstance();
184
                   teamMember.setTitleCache(member, true);
185
                   ((Team)author).addTeamMember(teamMember);
186
                }
187
            }
188

    
189
        } else {
190
            teamMembers = authorName.split(lastAuthorSeparator);
191
            if (teamMembers.length>1){
192
                author = Team.NewInstance();
193
                for(String member:teamMembers){
194
                  teamMember = Person.NewInstance();
195
                  teamMember.setTitleCache(member, true);
196
                  ((Team)author).addTeamMember(teamMember);
197

    
198
                }
199
            }else{
200
                author = Person.NewInstance();
201
                author.setTitleCache(authorName, true);
202
            }
203
        }
204
        author.getTitleCache();
205
        return author;
206
	}
207

    
208
    /**
209
     * @param refAuthor
210
     * @return
211
     */
212
    public static TeamOrPersonBase<?> parseNomAuthorString(String refAuthor) {
213
        TeamOrPersonBase<?> author = null;
214
        //possible strings: Lastname, A., Lastname B. & Lastname C.
215
        //Lastname A, Lastname B & Lastname
216
        //Lastname A Lastname B & Lastname C
217
        //Lastname, J & Lastname, L
218
        String[] firstTeamMembers = refAuthor.split(finalTeamSplitter);
219
        String[] teamMembers = null;
220
        String lastMember = null;
221
        lastMember = firstTeamMembers[firstTeamMembers.length-1];
222

    
223
        if (firstTeamMembers.length == 2){
224
            teamMembers = firstTeamMembers[0].split(test);
225
        }
226
        Person teamMember;
227
        author = Team.NewInstance();
228
        if (teamMembers != null){
229
            for(String member:teamMembers){
230
                if (!member.trim().equals("")){
231
                    teamMember = Person.NewInstance();
232
                    teamMember.setTitleCache(member, true);
233
                   ((Team)author).addTeamMember(teamMember);
234
                }
235
            }
236
            teamMember = Person.NewInstance();
237
            teamMember.setTitleCache(lastMember, true);
238
            ((Team)author).addTeamMember(teamMember);
239

    
240
        }else{
241
            teamMembers = lastMember.split(test);
242
            if (teamMembers.length >1){
243
                for(String member:teamMembers){
244
                    if (!member.trim().equals("")){
245
                        teamMember = Person.NewInstance();
246
                        teamMember.setTitleCache(member, true);
247
                       ((Team)author).addTeamMember(teamMember);
248
                    }
249
                }
250
            }else{
251
                author = Person.NewInstance();
252
                author.setTitleCache(lastMember, true);
253
            }
254

    
255

    
256
        }
257
        author.getTitleCache();
258
        return author;
259
    }
260

    
261
}
(2-2/20)