Project

General

Profile

Download (8.67 KB) Statistics
| Branch: | Revision:
1
/**
2
* Copyright (C) 2007 EDIT
3
* European Distributed Institute of Taxonomy
4
* http://www.e-taxonomy.eu
5
*
6
* The contents of this file are subject to the Mozilla Public License Version 1.1
7
* See LICENSE.TXT at the top of this package for the full license terms.
8
*/
9
package eu.etaxonomy.cdm.io.pesi.faunaEuropaea;
10

    
11
import java.sql.ResultSet;
12
import java.sql.SQLException;
13
import java.util.Collection;
14
import java.util.Map;
15

    
16
import org.apache.commons.lang.StringUtils;
17
import org.apache.log4j.Logger;
18
import org.springframework.stereotype.Component;
19
import org.springframework.transaction.TransactionStatus;
20

    
21
import eu.etaxonomy.cdm.io.common.ICdmIO;
22
import eu.etaxonomy.cdm.io.common.ImportHelper;
23
import eu.etaxonomy.cdm.io.common.MapWrapper;
24
import eu.etaxonomy.cdm.io.common.Source;
25
import eu.etaxonomy.cdm.model.agent.Person;
26
import eu.etaxonomy.cdm.model.agent.Team;
27
import eu.etaxonomy.cdm.model.agent.TeamOrPersonBase;
28
import eu.etaxonomy.cdm.model.common.CdmBase;
29
import eu.etaxonomy.cdm.strategy.parser.NonViralNameParserImpl;
30

    
31

    
32
/**
33
 * @author a.babadshanjan
34
 * @created 12.05.2009
35
 * @version 1.0
36
 */
37
@Component
38
public class FaunaEuropaeaAuthorImport extends FaunaEuropaeaImportBase {
39

    
40
    /**
41
     *
42
     */
43
    private static final long serialVersionUID = 1L;
44

    
45
    private static final Logger logger = Logger.getLogger(FaunaEuropaeaAuthorImport.class);
46

    
47
	private static int modCount = 1000;
48
	private final static String authorSeparator = ", ";
49
	private final static String lastAuthorSeparator = " & ";
50
	 protected static String fWs = "\\s*";
51
	 protected static String oWs = "\\s+";
52
	 protected static String finalTeamSplitter = "(" + fWs + "(&)" + fWs + "|" + oWs + "et" + oWs + ")";
53
	protected static String notFinalTeamSplitter = "((?:" + fWs + "," + fWs + ")(?!([A-Z][\\.]))"+"|" + finalTeamSplitter + ")";
54
	protected static String test = "(,\\s(?![A-Z][.|\\s|$]))|" + finalTeamSplitter ;
55

    
56

    
57
	/* (non-Javadoc)
58
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doCheck(eu.etaxonomy.cdm.io.common.IImportConfigurator)
59
	 */
60
	@Override
61
	protected boolean doCheck(FaunaEuropaeaImportState state){
62
		boolean result = true;
63
		logger.warn("No checking for Authors not implemented");
64

    
65
		return result;
66
	}
67

    
68
	/* (non-Javadoc)
69
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#doInvoke(eu.etaxonomy.cdm.io.common.IImportConfigurator, eu.etaxonomy.cdm.api.application.CdmApplicationController, java.util.Map)
70
	 */
71
	@Override
72
	protected void doInvoke(FaunaEuropaeaImportState state){
73
		/*
74
		logger.warn("Start author doInvoke");
75
		ProfilerController.memorySnapshot();
76
		*/
77
		if (!state.getConfig().isDoAuthors()){
78
			return;
79
		}
80
		Map<String, MapWrapper<? extends CdmBase>> stores = state.getStores();
81
		MapWrapper<TeamOrPersonBase<?>> authorStore = (MapWrapper<TeamOrPersonBase<?>>)stores.get(ICdmIO.TEAM_STORE);
82
		TransactionStatus txStatus = null;
83

    
84
		FaunaEuropaeaImportConfigurator fauEuConfig = state.getConfig();
85
		Source source = fauEuConfig.getSource();
86

    
87
		String namespace = "AuthorTeam";
88

    
89
		if(logger.isInfoEnabled()) { logger.info("Start making Authors..."); }
90

    
91
		try {
92

    
93
			String strQuery =
94
				" SELECT *  " +
95
				" FROM author " ;
96
			ResultSet rs = source.getResultSet(strQuery) ;
97

    
98
			int i = 0;
99
			while (rs.next()) {
100

    
101
				if ((i++ % modCount) == 0 && i!= 1 ) {
102
					if(logger.isDebugEnabled()) {
103
						logger.debug("Authors retrieved: " + (i-1));
104
					}
105
				}
106

    
107
				int authorId = rs.getInt("aut_id");
108
				String authorName = rs.getString("aut_name");
109

    
110
				String auctWithNecRegEx = "\\bauct\\b\\.?.*\\bnec\\b\\.?.*";
111
				String necAuctRegEx = "\\bnec\\b\\.?.*\\bauct\\b\\.?.*";
112

    
113
				boolean auctWithNecFound = expressionMatches(auctWithNecRegEx, authorName);
114
				boolean necAuctFound = expressionMatches(necAuctRegEx, authorName);
115
				if (auctWithNecFound){
116
					logger.debug("authorName before auct nec string is removed" + authorName);
117
					authorName = authorName.substring(expressionEnd("nec\\.?", authorName)+1, authorName.length());
118
					logger.debug("authorName after auct nec string is removed" + authorName);
119
				}
120

    
121
				if (necAuctFound){
122
					logger.debug("authorName before nec auct string is removed" + authorName);
123
					authorName = authorName.substring(0, authorName.indexOf("nec")-1);
124
					logger.debug("authorName before nec auct string is removed" + authorName);
125
				}
126
				TeamOrPersonBase<?> author = null;
127

    
128
				try {
129
				    NonViralNameParserImpl parser = NonViralNameParserImpl.NewInstance();
130

    
131
			        if (StringUtils.isNotBlank(authorName)){
132
			            //author = parser.author(authorName);
133
			            author = this.parseNomAuthorString(authorName);
134
    			        ImportHelper.setOriginalSource(author, fauEuConfig.getSourceReference(), authorId, namespace);
135

    
136
    					if (!authorStore.containsId(authorId)) {
137
    						authorStore.put(authorId, author);
138
    						if (logger.isDebugEnabled()) { logger.debug("Stored author (" + authorId + ") " + authorName); }
139
    					} else {
140
    						logger.warn("Not imported author with duplicated aut_id (" + authorId + ") " + authorName);
141
    					}
142
			        }
143
				} catch (Exception e) {
144
					logger.warn("An exception occurred when creating author with id " + authorId + ". Author could not be saved." + e.getMessage());
145
				}
146
			}
147

    
148
			if(logger.isInfoEnabled()) { logger.info("Saving authors ..."); }
149

    
150
			txStatus = startTransaction();
151

    
152
			// save authors
153
			getAgentService().save((Collection)authorStore.objects());
154

    
155
			commitTransaction(txStatus);
156

    
157
			if(logger.isInfoEnabled()) { logger.info("End making authors ..."); }
158

    
159
			return;
160

    
161
		} catch (SQLException e) {
162
			logger.error("SQLException:" +  e);
163
			state.setUnsuccessfull();
164
		}
165
	}
166

    
167
	/* (non-Javadoc)
168
	 * @see eu.etaxonomy.cdm.io.common.CdmIoBase#isIgnore(eu.etaxonomy.cdm.io.common.IImportConfigurator)
169
	 */
170
	@Override
171
    protected boolean isIgnore(FaunaEuropaeaImportState state){
172
		return ! state.getConfig().isDoAuthors();
173
	}
174

    
175
	public static TeamOrPersonBase<?> parseAuthorStringOld(String authorName){
176
	    TeamOrPersonBase<?> author = null;
177
	    String[] teamMembers = authorName.split(authorSeparator);
178
        String lastMember;
179
        String[] lastMembers;
180
        Person teamMember;
181
        if (teamMembers.length>1){
182
            lastMember = teamMembers[teamMembers.length -1];
183
            lastMembers = lastMember.split(lastAuthorSeparator);
184
            teamMembers[teamMembers.length -1] = "";
185
            author = Team.NewInstance();
186
            for(String member:teamMembers){
187
                if (!member.equals("")){
188
                    teamMember = Person.NewInstance();
189
                    teamMember.setTitleCache(member, true);
190
                   ((Team)author).addTeamMember(teamMember);
191
                }
192
            }
193
            if (lastMembers != null){
194
                for(String member:lastMembers){
195
                   teamMember = Person.NewInstance();
196
                   teamMember.setTitleCache(member, true);
197
                   ((Team)author).addTeamMember(teamMember);
198
                }
199
            }
200

    
201
        } else {
202
            teamMembers = authorName.split(lastAuthorSeparator);
203
            if (teamMembers.length>1){
204
                author = Team.NewInstance();
205
                for(String member:teamMembers){
206
                  teamMember = Person.NewInstance();
207
                  teamMember.setTitleCache(member, true);
208
                  ((Team)author).addTeamMember(teamMember);
209

    
210
                }
211
            }else{
212
                author = Person.NewInstance();
213
                author.setTitleCache(authorName, true);
214
            }
215
        }
216
        author.getTitleCache();
217
        return author;
218
	}
219

    
220
    /**
221
     * @param refAuthor
222
     * @return
223
     */
224
    public static TeamOrPersonBase<?> parseNomAuthorString(String refAuthor) {
225
        TeamOrPersonBase<?> author = null;
226
        //possible strings: Lastname, A., Lastname B. & Lastname C.
227
        //Lastname A, Lastname B & Lastname
228
        //Lastname A Lastname B & Lastname C
229
        //Lastname, J & Lastname, L
230
        String[] teamMembers = refAuthor.split(test);
231

    
232
        String lastMember;
233

    
234
        Person teamMember;
235
        author = Team.NewInstance();
236
        if (teamMembers.length>1){
237
            for(String member:teamMembers){
238
                if (!member.trim().equals("")){
239
                    teamMember = Person.NewInstance();
240
                    teamMember.setTitleCache(member, true);
241
                   ((Team)author).addTeamMember(teamMember);
242
                }
243
            }
244
        }else{
245
            author = Person.NewInstance();
246
            author.setTitleCache(refAuthor, true);
247

    
248
        }
249
        author.getTitleCache();
250
        return author;
251
    }
252

    
253
}
(2-2/20)