View Javadoc

1   /***
2    Copyright (C) 2005 The Java Community
3   
4    This program is free software; you can redistribute it and/or modify  it under
5    the terms of the GNU General Public License as published by  the Free Software
6    Foundation; either version 2 of the License, or  (at your option) any later
7    version.
8   
9    This program is distributed in the hope that it will be useful,  but WITHOUT
10   ANY WARRANTY; without even the implied warranty of  MERCHANTABILITY or FITNESS
11   FOR A PARTICULAR PURPOSE. See the  GNU General Public License for more details.
12  
13   You should have received a copy of the GNU General Public License  along with
14   this program; if not, write to the Free Software  Foundation, Inc., 59 Temple
15   Place, Suite 330, Boston, MA 02111-1307 USA.
16   */
17  package org.bejug.javacareers.feeder.parser;
18  
19  import java.net.MalformedURLException;
20  import java.net.URL;
21  import java.util.Calendar;
22  import java.util.Iterator;
23  import java.util.LinkedList;
24  
25  import org.apache.commons.logging.Log;
26  import org.apache.commons.logging.LogFactory;
27  import org.bejug.javacareers.feeder.FeederException;
28  import org.bejug.javacareers.feeder.model.JobList;
29  import org.bejug.javacareers.feeder.model.RssFeed;
30  import org.bejug.javacareers.jobs.model.JobOffer;
31  import org.gnu.stealthp.rsslib.RSSChannel;
32  import org.gnu.stealthp.rsslib.RSSException;
33  import org.gnu.stealthp.rsslib.RSSHandler;
34  import org.gnu.stealthp.rsslib.RSSItem;
35  import org.gnu.stealthp.rsslib.RSSParser;
36  /***
37   * Reads and parses a RSS file
38   *
39   * @author Bavo (last modified by $Author: bavo_jcs $
40   * @version $Revision: 1.3 $ - $Date: 2005/09/30 14:38:08 $
41   */
42  public class RssFeedParser implements FeedParser {
43  
44      /***
45       * The RssFeedParser logger.
46       */
47      private static final Log LOG = LogFactory.getLog(RssFeedParser.class);
48  
49      /***
50       * @param feed RssFeed to parse
51       * @return list of JobEntry objects from this feed
52       * @throws FeederException if an error
53       */
54      public JobList parseFeed(RssFeed feed) throws FeederException {
55          JobList list = null;
56          try {
57              list = new JobList(feed.getUri());
58              processRSS(feed, list);
59          }
60          catch (NullPointerException e) {
61              LOG.error(e);
62              throw new FeederException("Nullpointer: "+e);
63          }
64          catch (FeederException e) {
65              LOG.error(e);
66              throw new FeederException(e);
67          }
68          return list;
69      }
70  
71      /***
72       * @param feed    feed to scan
73       * @param list   List to add the jobs to
74       * @throws FeederException if an error
75       */
76      private void processRSS(RssFeed feed, JobList list)
77              throws FeederException {
78          String uri = feed.getUri();
79          String sourceUrl = feed.getSourceUrl();
80          String sourceName = feed.getSourceName();
81          URL url = null;
82          try {
83              url = new URL(uri);
84          } catch (MalformedURLException e) {
85              LOG.error(e);
86              throw new FeederException(
87                      new IllegalArgumentException("URL invalid: "+url));
88          }
89  
90  
91          LOG.info("Debug: Creating handler");
92          RSSHandler handler = new RSSHandler();
93          try {
94              LOG.info("Debug: Trying parse");
95              RSSParser.parseXmlFile(url, handler, false);
96              LOG.info("Debug: Parse done");
97          } catch (RSSException e) {
98              LOG.error(e);
99              throw new FeederException(e);
100         }
101 
102         LOG.info("Debug: Getting channel");
103         RSSChannel channel = handler.getRSSChannel();
104 
105         String copyright = channel.getCopyright();
106         LOG.info("Debug: copyright = " + copyright);
107         String webmaster = channel.getWebMaster();
108         LOG.info("Debug: webmaster = " + webmaster);
109         String build = channel.getLastBuildDate();
110         LOG.info("Debug: build = " + build);
111         String language = channel.getLanguage();
112         LOG.info("Debug: language = " + language);
113         String desc = channel.getDescription();
114         LOG.info("Debug: desc = " + desc);
115         String about = channel.getAboutAttribute();
116         LOG.info("Debug: about = " + about);
117         String title = channel.getTitle();
118         LOG.info("Debug: title = " + title);
119         String link = channel.getLink();
120         LOG.info("Debug: link = " + link);
121         LinkedList items = channel.getItems();
122         Iterator iter = items.iterator();
123 
124         while (iter.hasNext()) {
125             RSSItem item = (RSSItem) iter.next();
126             String itemtitle = item.getTitle();
127             String text = item.getDescription();
128             String itemlink = item.getLink();
129 
130             JobOffer entry = new JobOffer();
131             entry.setTitle(itemtitle);
132             entry.setDescription(text);
133 
134             entry.setSourceUrl(sourceUrl);
135             entry.setSourceName(sourceName);
136 
137             entry.setPublicationDate(Calendar.getInstance().getTime());
138             entry.setUrl(itemlink);
139             entry.setLocation("Rss Feed");
140             entry.setFeedUrl(uri);
141 
142             LOG.info("Debug: link = " + itemlink);
143             LOG.info("Debug: title = " + itemtitle);
144             LOG.info("Debug: sourceName:"+sourceName);
145 
146             list.addJobOffer(entry);
147         }
148     }
149 }
150 
151 /***
152  * $Log: RssFeedParser.java,v $
153  * Revision 1.3  2005/09/30 14:38:08  bavo_jcs
154  * Fixed URL
155  *
156  * Revision 1.2  2005/09/13 08:11:17  schauwvliege
157  * organize imports
158  *
159  * Revision 1.1  2005/08/26 07:58:29  ge0ffrey
160  * split up the sources in service, serviceimpl and webclient
161  *
162  * Revision 1.7  2005/08/16 09:09:19  bavo_jcs
163  * Replaced Log4j usage with Commons Logging
164  *
165  * Revision 1.6  2005/08/10 09:04:48  bavo_jcs
166  * Optimized imports according to checkstyle
167  *
168  * Revision 1.5  2005/08/09 12:59:54  bavo_jcs
169  * Optimized imports
170  *
171  * Revision 1.4  2005/06/14 12:05:52  schauwvliege
172  * CheckStyle and fixing tests
173  *
174  * Revision 1.3  2005/06/13 13:04:54  bavo_jcs
175  * delete by URL fix
176  *
177  * Revision 1.2  2005/06/09 08:18:43  bejug_cc
178  * Fix initial import
179  *
180  * Revision 1.11  2005/06/06 15:47:24  bbr
181  * job source
182  *
183  * Revision 1.10  2005/06/06 14:13:23  bbr
184  * lowercased context files
185  *
186  * Revision 1.9  2005/06/06 13:51:54  bbr
187  * extracted feeder from test
188  *
189  * Revision 1.8  2005/06/05 15:20:24  sja
190  * Changed sourceName content to link
191  *
192  * Revision 1.7  2005/06/05 14:16:50  sja
193  * Added sourceUrl and sourceName.
194  *
195  * Revision 1.6  2005/06/03 09:44:09  bbr
196  * admin feed panel work
197  *
198  * Revision 1.5  2005/06/02 15:52:41  PSONG09
199  * modified location of rss feed user
200  *
201  * Revision 1.3  2005/05/31 11:56:19  bbr
202  * deleted JobEntry
203  *
204  * Revision 1.2  2005/05/24 11:52:39  bbr
205  * Using spring sheduling
206  *
207  * Revision 1.1  2005/05/23 17:04:57  sja
208  * Moved to org.bejug.javacareers.feeder package.
209  *
210  * Revision 1.1  2005/05/23 08:46:33  PSONG09
211  * added feeder source files to project
212  *
213  * Revision 1.3  2005/05/23 07:10:54  stephan_janssen
214  * Code cleanup.
215  *
216  * Revision 1.2  2005/05/19 15:50:51  stephan_janssen
217  * Made LOG final static.
218  *
219  * Revision 1.1  2005/05/11 11:53:25  bavo_jcs
220  * refactored
221  * - conform to conventions
222  * - some javadoc
223  * - Added FeederTask design
224  *
225  * Revision 1.5  2005/05/10 16:03:59  bavo_jcs
226  * cleanup
227  *
228  * Revision 1.4  2005/05/10 11:32:49  bavo_jcs
229  * integrated with services from JavaCareers Web
230  *
231  * Revision 1.3  2005/05/03 13:46:07  bavo_jcs
232  * Added mockups
233  *
234  * Revision 1.2  2005/05/02 15:37:38  bavo_jcs
235  * Added Javadocs
236  *
237  * Revision 1.1.1.1  2005/04/26 14:13:50  stephan_janssen
238  * Initial import
239  *
240  * Revision 1.1.1.1  2005/04/26 12:58:32  sja
241  * Initial Release
242  *
243  * Revision 1.1.1.1  2005/04/26 12:51:27  sja
244  * Initial Release
245  *
246  */