split head parsing into coarse and fine

git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@96 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
andreas.schildbach 2010-08-22 00:21:09 +00:00
parent 75a7692e7f
commit 4b1f0bb5e7

View file

@ -359,44 +359,75 @@ public class SbbProvider implements NetworkProvider
return uri.toString();
}
private static final Pattern P_DEPARTURES_HEAD = Pattern.compile(".*?<p class=\"qs\">\n?" //
+ "<b>(.*?)</b><br />\n?"//
+ "Abfahrt (\\d+:\\d+)\n?"//
+ "Uhr, (\\d+\\.\\d+\\.\\d+)\n?"//
+ "</p>.*", Pattern.DOTALL);
private static final Pattern P_DEPARTURES_COARSE = Pattern.compile("<p class=\"sq\">(.+?)</p>", Pattern.DOTALL);
private static final Pattern P_DEPARTURES_FINE = Pattern.compile(".*?<b>(.*?)</b>\n?" //
+ "&gt;&gt;\n?" //
+ "(.*?)\n?" //
+ "<br />\n?" //
+ "<b>(\\d+:\\d+)</b>.*", Pattern.DOTALL);
private static final Pattern P_DEPARTURES_URI_STATION_ID = Pattern.compile("input=(\\d+)");
private static final Pattern P_DEPARTURES_HEAD_COARSE = Pattern.compile(".*?" //
+ "(?:" //
+ "<p class=\"qs\">\n(.+?)\n</p>.*?" //
+ "(?:(.+)|(an dieser Haltestelle keines)).*?" //
+ "<p class=\"links\">\n(.+?)\n</p>" //
+ "|(Informationen zu))" //
+ ".*?" //
, Pattern.DOTALL);
private static final Pattern P_DEPARTURES_HEAD_FINE = Pattern.compile("" //
+ "<b>(.*?)</b><br />\n" // location
+ "Abfahrt (\\d+:\\d+)\n" // time
+ "Uhr, (\\d{2}\\.\\d{2}\\.\\d{2}).*?" // date
+ "input=(\\d+).*?" // locationId
, Pattern.DOTALL);
private static final Pattern P_DEPARTURES_COARSE = Pattern.compile("<p class=\"sq\">\n(.+?)\n</p>", Pattern.DOTALL);
private static final Pattern P_DEPARTURES_FINE = Pattern.compile("" //
+ "<b>(.*?)</b>\n" // line
+ "&gt;&gt;\n" //
+ "(.*?)\n" // destination
+ "<br />\n" //
+ "<b>(\\d+:\\d+)</b>.*?" // time
, Pattern.DOTALL);
public QueryDeparturesResult queryDepartures(final String uri) throws IOException
{
final CharSequence page = ParserUtils.scrape(uri);
final Matcher mStationId = P_DEPARTURES_URI_STATION_ID.matcher(uri);
if (!mStationId.find())
throw new IllegalStateException(uri);
final int stationId = Integer.parseInt(mStationId.group(1));
// parse page
final Matcher mHead = P_DEPARTURES_HEAD.matcher(page);
if (mHead.matches())
final Matcher mHeadCoarse = P_DEPARTURES_HEAD_COARSE.matcher(page);
if (mHeadCoarse.matches())
{
final String location = ParserUtils.resolveEntities(mHead.group(1));
final Date currentTime = ParserUtils.joinDateTime(ParserUtils.parseDate(mHead.group(3)), ParserUtils.parseTime(mHead.group(2)));
// messages
if (mHeadCoarse.group(3) != null)
return new QueryDeparturesResult(uri, Status.NO_INFO);
else if (mHeadCoarse.group(5) != null)
return new QueryDeparturesResult(uri, Status.INVALID_STATION);
final String c = mHeadCoarse.group(1) + mHeadCoarse.group(4);
final Matcher mHeadFine = P_DEPARTURES_HEAD_FINE.matcher(c);
if (mHeadFine.matches())
{
final String location = ParserUtils.resolveEntities(mHeadFine.group(1));
final Date currentTime = ParserUtils.joinDateTime(ParserUtils.parseDate(mHeadFine.group(3)), ParserUtils
.parseTime(mHeadFine.group(2)));
final int locationId = Integer.parseInt(mHeadFine.group(4));
final List<Departure> departures = new ArrayList<Departure>(8);
// choose matcher
final Matcher mDepCoarse = P_DEPARTURES_COARSE.matcher(page);
final Matcher mDepCoarse = P_DEPARTURES_COARSE.matcher(mHeadCoarse.group(2));
while (mDepCoarse.find())
{
final Matcher mDepFine = P_DEPARTURES_FINE.matcher(mDepCoarse.group(1));
if (mDepFine.matches())
{
final Departure dep = parseDeparture(mDepFine, currentTime);
final String line = normalizeLine(ParserUtils.resolveEntities(mDepFine.group(1)));
final String destination = ParserUtils.resolveEntities(mDepFine.group(2));
final Calendar current = new GregorianCalendar();
current.setTime(currentTime);
final Calendar parsed = new GregorianCalendar();
parsed.setTime(ParserUtils.parseTime(mDepFine.group(3)));
parsed.set(Calendar.YEAR, current.get(Calendar.YEAR));
parsed.set(Calendar.MONTH, current.get(Calendar.MONTH));
parsed.set(Calendar.DAY_OF_MONTH, current.get(Calendar.DAY_OF_MONTH));
if (ParserUtils.timeDiff(parsed.getTime(), currentTime) < -PARSER_DAY_ROLLOVER_THRESHOLD_MS)
parsed.add(Calendar.DAY_OF_MONTH, 1);
final Departure dep = new Departure(parsed.getTime(), line, line != null ? LINES.get(line.charAt(0)) : null, 0, destination);
if (!departures.contains(dep))
departures.add(dep);
}
@ -406,34 +437,17 @@ public class SbbProvider implements NetworkProvider
}
}
return new QueryDeparturesResult(uri, stationId, location, currentTime, departures);
return new QueryDeparturesResult(uri, locationId, location, currentTime, departures);
}
else
{
return new QueryDeparturesResult(uri, Status.NO_INFO);
throw new IllegalArgumentException("cannot parse '" + c + "' on " + uri);
}
}
private static Departure parseDeparture(final Matcher mDep, final Date currentTime)
else
{
// line
final String line = normalizeLine(ParserUtils.resolveEntities(mDep.group(1)));
// destination
final String destination = ParserUtils.resolveEntities(mDep.group(2));
// time
final Calendar current = new GregorianCalendar();
current.setTime(currentTime);
final Calendar parsed = new GregorianCalendar();
parsed.setTime(ParserUtils.parseTime(mDep.group(3)));
parsed.set(Calendar.YEAR, current.get(Calendar.YEAR));
parsed.set(Calendar.MONTH, current.get(Calendar.MONTH));
parsed.set(Calendar.DAY_OF_MONTH, current.get(Calendar.DAY_OF_MONTH));
if (ParserUtils.timeDiff(parsed.getTime(), currentTime) < -PARSER_DAY_ROLLOVER_THRESHOLD_MS)
parsed.add(Calendar.DAY_OF_MONTH, 1);
return new Departure(parsed.getTime(), line, line != null ? LINES.get(line.charAt(0)) : null, 0, destination);
throw new IllegalArgumentException("cannot parse '" + page + "' on " + uri);
}
}
private static final Pattern P_NORMALIZE_LINE = Pattern.compile("([A-Za-zÄÖÜäöüß]+)[\\s-]*(.*)");
@ -471,7 +485,7 @@ public class SbbProvider implements NetworkProvider
throw new IllegalStateException("cannot normalize line " + line);
}
private static final Pattern P_NORMALIZE_TYPE_SBAHN = Pattern.compile("S\\d*");
private static final Pattern P_NORMALIZE_TYPE_SBAHN = Pattern.compile("SN?\\d*");
private static final Pattern P_NORMALIZE_TYPE_BUS = Pattern.compile("BUS\\w*");
private static char normalizeType(final String type)