1 /***
2 Copyright (C) 2005 The Java Community
3
4 This program is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free Software
6 Foundation; either version 2 of the License, or (at your option) any later
7 version.
8
9 This program is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License along with
14 this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 Place, Suite 330, Boston, MA 02111-1307 USA.
16 */
17 package org.bejug.javacareers.feeder.parser;
18
19 import java.net.MalformedURLException;
20 import java.net.URL;
21 import java.util.Calendar;
22 import java.util.Iterator;
23 import java.util.LinkedList;
24
25 import org.apache.commons.logging.Log;
26 import org.apache.commons.logging.LogFactory;
27 import org.bejug.javacareers.feeder.FeederException;
28 import org.bejug.javacareers.feeder.model.JobList;
29 import org.bejug.javacareers.feeder.model.RssFeed;
30 import org.bejug.javacareers.jobs.model.JobOffer;
31 import org.gnu.stealthp.rsslib.RSSChannel;
32 import org.gnu.stealthp.rsslib.RSSException;
33 import org.gnu.stealthp.rsslib.RSSHandler;
34 import org.gnu.stealthp.rsslib.RSSItem;
35 import org.gnu.stealthp.rsslib.RSSParser;
36 /***
37 * Reads and parses a RSS file
38 *
39 * @author Bavo (last modified by $Author: bavo_jcs $
40 * @version $Revision: 1.3 $ - $Date: 2005/09/30 14:38:08 $
41 */
42 public class RssFeedParser implements FeedParser {
43
44 /***
45 * The RssFeedParser logger.
46 */
47 private static final Log LOG = LogFactory.getLog(RssFeedParser.class);
48
49 /***
50 * @param feed RssFeed to parse
51 * @return list of JobEntry objects from this feed
52 * @throws FeederException if an error
53 */
54 public JobList parseFeed(RssFeed feed) throws FeederException {
55 JobList list = null;
56 try {
57 list = new JobList(feed.getUri());
58 processRSS(feed, list);
59 }
60 catch (NullPointerException e) {
61 LOG.error(e);
62 throw new FeederException("Nullpointer: "+e);
63 }
64 catch (FeederException e) {
65 LOG.error(e);
66 throw new FeederException(e);
67 }
68 return list;
69 }
70
71 /***
72 * @param feed feed to scan
73 * @param list List to add the jobs to
74 * @throws FeederException if an error
75 */
76 private void processRSS(RssFeed feed, JobList list)
77 throws FeederException {
78 String uri = feed.getUri();
79 String sourceUrl = feed.getSourceUrl();
80 String sourceName = feed.getSourceName();
81 URL url = null;
82 try {
83 url = new URL(uri);
84 } catch (MalformedURLException e) {
85 LOG.error(e);
86 throw new FeederException(
87 new IllegalArgumentException("URL invalid: "+url));
88 }
89
90
91 LOG.info("Debug: Creating handler");
92 RSSHandler handler = new RSSHandler();
93 try {
94 LOG.info("Debug: Trying parse");
95 RSSParser.parseXmlFile(url, handler, false);
96 LOG.info("Debug: Parse done");
97 } catch (RSSException e) {
98 LOG.error(e);
99 throw new FeederException(e);
100 }
101
102 LOG.info("Debug: Getting channel");
103 RSSChannel channel = handler.getRSSChannel();
104
105 String copyright = channel.getCopyright();
106 LOG.info("Debug: copyright = " + copyright);
107 String webmaster = channel.getWebMaster();
108 LOG.info("Debug: webmaster = " + webmaster);
109 String build = channel.getLastBuildDate();
110 LOG.info("Debug: build = " + build);
111 String language = channel.getLanguage();
112 LOG.info("Debug: language = " + language);
113 String desc = channel.getDescription();
114 LOG.info("Debug: desc = " + desc);
115 String about = channel.getAboutAttribute();
116 LOG.info("Debug: about = " + about);
117 String title = channel.getTitle();
118 LOG.info("Debug: title = " + title);
119 String link = channel.getLink();
120 LOG.info("Debug: link = " + link);
121 LinkedList items = channel.getItems();
122 Iterator iter = items.iterator();
123
124 while (iter.hasNext()) {
125 RSSItem item = (RSSItem) iter.next();
126 String itemtitle = item.getTitle();
127 String text = item.getDescription();
128 String itemlink = item.getLink();
129
130 JobOffer entry = new JobOffer();
131 entry.setTitle(itemtitle);
132 entry.setDescription(text);
133
134 entry.setSourceUrl(sourceUrl);
135 entry.setSourceName(sourceName);
136
137 entry.setPublicationDate(Calendar.getInstance().getTime());
138 entry.setUrl(itemlink);
139 entry.setLocation("Rss Feed");
140 entry.setFeedUrl(uri);
141
142 LOG.info("Debug: link = " + itemlink);
143 LOG.info("Debug: title = " + itemtitle);
144 LOG.info("Debug: sourceName:"+sourceName);
145
146 list.addJobOffer(entry);
147 }
148 }
149 }
150
151 /***
152 * $Log: RssFeedParser.java,v $
153 * Revision 1.3 2005/09/30 14:38:08 bavo_jcs
154 * Fixed URL
155 *
156 * Revision 1.2 2005/09/13 08:11:17 schauwvliege
157 * organize imports
158 *
159 * Revision 1.1 2005/08/26 07:58:29 ge0ffrey
160 * split up the sources in service, serviceimpl and webclient
161 *
162 * Revision 1.7 2005/08/16 09:09:19 bavo_jcs
163 * Replaced Log4j usage with Commons Logging
164 *
165 * Revision 1.6 2005/08/10 09:04:48 bavo_jcs
166 * Optimized imports according to checkstyle
167 *
168 * Revision 1.5 2005/08/09 12:59:54 bavo_jcs
169 * Optimized imports
170 *
171 * Revision 1.4 2005/06/14 12:05:52 schauwvliege
172 * CheckStyle and fixing tests
173 *
174 * Revision 1.3 2005/06/13 13:04:54 bavo_jcs
175 * delete by URL fix
176 *
177 * Revision 1.2 2005/06/09 08:18:43 bejug_cc
178 * Fix initial import
179 *
180 * Revision 1.11 2005/06/06 15:47:24 bbr
181 * job source
182 *
183 * Revision 1.10 2005/06/06 14:13:23 bbr
184 * lowercased context files
185 *
186 * Revision 1.9 2005/06/06 13:51:54 bbr
187 * extracted feeder from test
188 *
189 * Revision 1.8 2005/06/05 15:20:24 sja
190 * Changed sourceName content to link
191 *
192 * Revision 1.7 2005/06/05 14:16:50 sja
193 * Added sourceUrl and sourceName.
194 *
195 * Revision 1.6 2005/06/03 09:44:09 bbr
196 * admin feed panel work
197 *
198 * Revision 1.5 2005/06/02 15:52:41 PSONG09
199 * modified location of rss feed user
200 *
201 * Revision 1.3 2005/05/31 11:56:19 bbr
202 * deleted JobEntry
203 *
204 * Revision 1.2 2005/05/24 11:52:39 bbr
205 * Using spring sheduling
206 *
207 * Revision 1.1 2005/05/23 17:04:57 sja
208 * Moved to org.bejug.javacareers.feeder package.
209 *
210 * Revision 1.1 2005/05/23 08:46:33 PSONG09
211 * added feeder source files to project
212 *
213 * Revision 1.3 2005/05/23 07:10:54 stephan_janssen
214 * Code cleanup.
215 *
216 * Revision 1.2 2005/05/19 15:50:51 stephan_janssen
217 * Made LOG final static.
218 *
219 * Revision 1.1 2005/05/11 11:53:25 bavo_jcs
220 * refactored
221 * - conform to conventions
222 * - some javadoc
223 * - Added FeederTask design
224 *
225 * Revision 1.5 2005/05/10 16:03:59 bavo_jcs
226 * cleanup
227 *
228 * Revision 1.4 2005/05/10 11:32:49 bavo_jcs
229 * integrated with services from JavaCareers Web
230 *
231 * Revision 1.3 2005/05/03 13:46:07 bavo_jcs
232 * Added mockups
233 *
234 * Revision 1.2 2005/05/02 15:37:38 bavo_jcs
235 * Added Javadocs
236 *
237 * Revision 1.1.1.1 2005/04/26 14:13:50 stephan_janssen
238 * Initial import
239 *
240 * Revision 1.1.1.1 2005/04/26 12:58:32 sja
241 * Initial Release
242 *
243 * Revision 1.1.1.1 2005/04/26 12:51:27 sja
244 * Initial Release
245 *
246 */