View Javadoc
1 /*** 2 * Created by IntelliJ IDEA. 3 * User: Lennart 4 * Date: 23-nov-2003 5 * Time: 14:12:56 6 */ 7 package comics; 8 9 import comics.core.ComicURLFactory; 10 import comics.core.ComicImage; 11 import comics.core.GregorianDayRoller; 12 import comics.core.ComicGrabberEngineImplementation; 13 import comics.interfaces.DayRoller; 14 import comics.interfaces.ComicGrabberEngine; 15 16 import java.util.*; 17 import java.text.SimpleDateFormat; 18 import java.text.ParseException; 19 import java.io.*; 20 import java.net.URL; 21 22 /* 23 * CVS information: 24 * 25 * $Revision: 1.1 $ 26 * $Date: 2003/12/03 23:06:02 $ 27 */ 28 29 /*** 30 * This class grabs a complete history of comics off the UComics website. 31 * 32 * @author Lennart Martens 33 */ 34 public class NewComicGrabber { 35 36 /*** 37 * The date converter. 38 */ 39 private static final SimpleDateFormat iSDF = new SimpleDateFormat("dd/MM/yyyy"); 40 41 /*** 42 * Main method is the entry point for the application. 43 * 44 * @param args String[] with the start-up args. 45 */ 46 public static void main(String[] args) { 47 // Check the start-up args. 48 if(args == null || (args.length != 2 && args.length != 3)) { 49 System.err.println("\n\nUsage:\n\tComicGrabber <ucomics_comic_abbreviation> <output_path> [startdate_in_dd/MM/yyyy]"); 50 System.err.println("\n\t\tOR\n"); 51 System.err.println("\tComicGrabber @<details_file> <output_path>"); 52 System.exit(1); 53 } 54 // Okay, collect the minimal arguments set. 55 String abbreviationOrFile = args[0]; 56 String output = args[1]; 57 Collection allComics = null; 58 // See if we're handling an input file or a single retrieval. 59 if(abbreviationOrFile.startsWith("@")) { 60 allComics = processFile(abbreviationOrFile.substring(1)); 61 } else { 62 // Single retrieval. 63 // See if there is a startdate. 64 String date = null; 65 if(args.length == 3) { 66 date = args[2]; 67 } 68 allComics = new ArrayList(1); 69 try { 70 allComics.add(new InnerComicSpecs(abbreviationOrFile, date, null)); 71 } catch(ParseException pe) { 72 System.err.println("\n\nUnable to parse the input: " + pe.getMessage() + "\n"); 73 System.exit(1); 74 } 75 } 76 77 // Now cycle through the Collection and retrieve each one. 78 //@TODO process each comic here! 79 Iterator iter = allComics.iterator(); 80 while(iter.hasNext()) { 81 InnerComicSpecs lInnerComicSpecs = (InnerComicSpecs)iter.next(); 82 // The day roller for this comic. 83 DayRoller roller = new GregorianDayRoller(lInnerComicSpecs.getStartDate(), lInnerComicSpecs.getDays()); 84 // The comic code. 85 String comic = lInnerComicSpecs.getCode(); 86 // Create a ComicGrabber. 87 ComicGrabberEngine engine = new ComicGrabberEngineImplementation(); 88 try { 89 // This variable is used to see when it's done grabbing. 90 int previousError = 0; 91 // This variable keeps track of the previous month. Each new month grabbed, gets a println. 92 int previousMonth = roller.getCurrentMonth(); 93 int previousYear = roller.getCurrentYear(); 94 // Total error count while retrieving. 95 int totalError = 0; 96 // Some useful output before starting. 97 System.out.println("\n############################################\n Retrieving '" + comic + "'\n Starting from " + roller.getFormattedCurrentDate("dd/MM/yyyy") + "\n############################################"); 98 // Keep cycling. 99 while(previousError < 14) { 100 // Each retrieval attempt has its own 'try-catch' block, to keep an isolated failure from 101 // wrecking the entire program. 102 try { 103 // We need the year separately in the retrieval process, and the month is used for printing 104 // statements. 105 int year = roller.getCurrentYear(); 106 int month = roller.getCurrentMonth(); 107 // Make sure the notation is zero-primed for single-digit numbers, eg. '08'. 108 String previousMonthString = ((previousMonth < 10)?"0":"") + Integer.toString(previousMonth); 109 String currentMonthString = ((month < 10)?"0":"") + Integer.toString(month); 110 // Some user-friendly statements. 111 if(month < previousMonth || year < previousYear) { 112 System.out.println("Grabbed " + previousMonthString + "/" + previousYear + " for comic '" + comic + "'."); 113 previousMonth = month; 114 previousYear = year; 115 } 116 // Create the comic URL. 117 URL url = ComicURLFactory.createURL(ComicURLFactory.UCOMICS, comic, roller.getCurrentDate()); 118 // The output dir will have a subdir with the comic abbreviation and a subdir for each year, and month within that year. 119 // They will (of course) only be created if they didn't already exist. 120 File outputDir = new File(output, comic + "/" + Integer.toString(year) + "/" + currentMonthString + "/"); 121 if(!outputDir.exists()) { 122 outputDir.mkdirs(); 123 } 124 // Retrieve the ComicImage. 125 ComicImage image = engine.grabComic(url); 126 // This is the outputfile proper. 127 File outputFile = new File(outputDir, image.getFilename()); 128 // See if the output file already exists. 129 // If it does, we shouldn't bother anymore; everything is updated. 130 if(outputFile.exists()) { 131 System.out.println("\n* Found comic '" + outputFile.getName() + "' in output path. Updating complete."); 132 break; 133 } else { 134 // Write the file. 135 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(outputFile)); 136 bos.write(image.getImage()); 137 bos.flush(); 138 bos.close(); 139 } 140 previousError = 0; 141 } catch(IOException ioe) { 142 System.err.println("Error for date " + roller.getFormattedCurrentDate("dd/MM/yyyy") + ": " + ioe.getMessage()); 143 previousError++; 144 totalError++; 145 } 146 roller.getPreviousDay(); 147 } 148 String last = roller.getFormattedCurrentDate("dd/MM/yyyy"); 149 if(previousError >= 14) { 150 System.out.println("\n\nExiting because the comic was not found for 15 consecutive days on the server."); 151 } else { 152 System.out.println("\n\nExiting because update was complete."); 153 } 154 System.out.println("\n - Last date: " + last + "\n - Total error count: " + totalError + "\n\n"); 155 } catch(Exception e) { 156 e.printStackTrace(); 157 } 158 } 159 } 160 161 162 private static Collection processFile(String aFilename) { 163 Collection result = new ArrayList(20); 164 try { 165 BufferedReader br = new BufferedReader(new FileReader(aFilename)); 166 String line = null; 167 while((line = br.readLine()) != null) { 168 // Trim line. 169 line = line.trim(); 170 // Skip empty lines and comment lines (starting with '#' or '!'). 171 if(line.equals("") || line.startsWith("#") || line.startsWith("!")) { 172 continue; 173 } else { 174 // Okay, we've got a potentially meaningful line. 175 // See if it makes sense. 176 StringTokenizer st = new StringTokenizer(line, " "); 177 int count = st.countTokens(); 178 if(count != 2 && count != 3) { 179 // Not good. 180 System.err.println("Unable to parse line '" + line + "'. Skipping it."); 181 } else { 182 // Let's crack this thing. 183 String comic = st.nextToken().trim(); 184 String days = st.nextToken().trim(); 185 String startdate = null; 186 if(count == 3) { 187 startdate = st.nextToken().trim(); 188 } 189 try { 190 result.add(new InnerComicSpecs(comic, days, startdate)); 191 } catch(ParseException pe) { 192 System.err.println("Unable to parse line '" + line + "': " + pe.getMessage() + ". Skipping it."); 193 } 194 } 195 } 196 } 197 br.close(); 198 } catch(IOException ioe) { 199 System.err.println("\n\nUnable to read the specified file '" + aFilename + "': " + ioe.getMessage() + "\n"); 200 System.exit(1); 201 } 202 return result; 203 } 204 205 /*** 206 * This class wraps the necessary specs for a comic. 207 */ 208 private static class InnerComicSpecs { 209 /*** 210 * The comics code. 211 */ 212 private String iCode = null; 213 /*** 214 * The desired start date. 215 */ 216 private Date iStartDate = null; 217 /*** 218 * The applicable days. 219 */ 220 private int[] iDays = null; 221 222 /*** 223 * The constructor parses all parts into the correct format for hanlding by the engine. 224 * 225 * @param aCode String with the comics code (eg. 'ga' for Garfield) 226 * @param aDays String with comma-separated three-letter day codes for the applicable days, 227 * or 'null' if all days are applicable. 228 * @param aStartDate String with the desired startdate. Format should be 'dd/MM/yyyy'. 229 * Can be 'null' when no start date is specified. 230 * @throws ParseException when one of the Strings could not be parsed. 231 */ 232 public InnerComicSpecs(String aCode, String aDays, String aStartDate) throws ParseException { 233 iCode = aCode; 234 iDays = this.parseDays(aDays); 235 iStartDate = this.parseDate(aStartDate); 236 } 237 238 /*** 239 * This method reports on the comic code. 240 * 241 * @return String with the comic code. 242 */ 243 public String getCode() { 244 return iCode; 245 } 246 247 /*** 248 * This method reports on the applicable days. 249 * 250 * @return int[] with the Calendar int day codes for the applicable days, 251 * or 'null' if all days are applicable. 252 */ 253 public int[] getDays() { 254 return iDays; 255 } 256 257 /*** 258 * This method reports on the desired start date 259 * 260 * @return Date with the desired start Date, or 'null' if none was specified. 261 */ 262 public Date getStartDate() { 263 return iStartDate; 264 } 265 266 /*** 267 * This method parses a String into a Date. 268 * 269 * @param aDateString String with the date to parse. Can be 'null', in which case 270 * 'null' will also be returned for the Date. 271 * @return Date with the corresponding Date object. Can be 'null' if the String passed in was 'null'. 272 * @throws ParseException when the String could not be parsed 273 */ 274 private Date parseDate(String aDateString) throws ParseException { 275 Date temp = null; 276 if(aDateString != null) { 277 try { 278 temp = iSDF.parse(aDateString); 279 } catch(ParseException pe) { 280 throw new ParseException("Unable to parse date '" + aDateString + "'. please format it as 'dd/MM/yyyy'.", 0); 281 } 282 } 283 return temp; 284 } 285 286 /*** 287 * This method takes a String with comma separated three-letter day codes and parses it to an array of ints 288 * where each int corrresponds to a day code as defined on the Calendar interface. 289 * 290 * @param aDaysString String with the comma separated three-letter day codes to parse. 291 * Can be 'null' in which case the returned int[] will also be 'null'. 292 * @return int[] with the corresponding codes from the Calendar for the applicable days. 293 * Can be 'null' if the String passed in was 'null'. 294 * @throws ParseException when the String could not be parsed 295 */ 296 private int[] parseDays(String aDaysString) throws ParseException { 297 int[] result = null; 298 StringTokenizer st = new StringTokenizer(aDaysString, ","); 299 int count = st.countTokens(); 300 result = new int[count]; 301 if(count > 1) { 302 int counter = 0; 303 while(st.hasMoreTokens()) { 304 result[counter] = this.parseDayCode(st.nextToken().trim()); 305 counter++; 306 } 307 } else if(count == 1) { 308 String temp = st.nextToken().trim(); 309 if(!temp.equalsIgnoreCase("all")) { 310 result[0] = this.parseDayCode(temp); 311 } else { 312 // All days selected. Set the applicable days to 'null'. 313 result = null; 314 } 315 } else { 316 throw new ParseException("No day codes found. Specify at least 'all'.", 0); 317 } 318 319 return result; 320 } 321 322 /*** 323 * This method parses a three-letter day code into the corresponding Calendar 324 * interface int code. 325 * 326 * @param aDayCode String with the three-letter day code 327 * @return int with the corresponding int from the Calendar interface 328 * @throws ParseException when the three-letter code could not be parsed 329 */ 330 private int parseDayCode(String aDayCode) throws ParseException { 331 int result = 0; 332 if(aDayCode.equalsIgnoreCase("mon")) { 333 result = Calendar.MONDAY; 334 } else if(aDayCode.equalsIgnoreCase("tue")) { 335 result = Calendar.TUESDAY; 336 } else if(aDayCode.equalsIgnoreCase("wed")) { 337 result = Calendar.WEDNESDAY; 338 } else if(aDayCode.equalsIgnoreCase("thu")) { 339 result = Calendar.THURSDAY; 340 } else if(aDayCode.equalsIgnoreCase("fri")) { 341 result = Calendar.FRIDAY; 342 } else if(aDayCode.equalsIgnoreCase("sat")) { 343 result = Calendar.SATURDAY; 344 } else if(aDayCode.equalsIgnoreCase("sun")) { 345 result = Calendar.SUNDAY; 346 } else { 347 throw new ParseException("Unrecognized three-letter day code '" + aDayCode + "'.", 0); 348 } 349 return result; 350 } 351 } 352 }

This page was automatically generated by Maven