mirror of
https://gitlab.com/oeffi/public-transport-enabler.git
synced 2025-07-15 17:10:30 +00:00
split head parsing into coarse and fine
git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@96 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
parent
75a7692e7f
commit
4b1f0bb5e7
1 changed files with 76 additions and 62 deletions
|
@ -359,44 +359,75 @@ public class SbbProvider implements NetworkProvider
|
||||||
return uri.toString();
|
return uri.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Pattern P_DEPARTURES_HEAD = Pattern.compile(".*?<p class=\"qs\">\n?" //
|
private static final Pattern P_DEPARTURES_HEAD_COARSE = Pattern.compile(".*?" //
|
||||||
+ "<b>(.*?)</b><br />\n?"//
|
+ "(?:" //
|
||||||
+ "Abfahrt (\\d+:\\d+)\n?"//
|
+ "<p class=\"qs\">\n(.+?)\n</p>.*?" //
|
||||||
+ "Uhr, (\\d+\\.\\d+\\.\\d+)\n?"//
|
+ "(?:(.+)|(an dieser Haltestelle keines)).*?" //
|
||||||
+ "</p>.*", Pattern.DOTALL);
|
+ "<p class=\"links\">\n(.+?)\n</p>" //
|
||||||
private static final Pattern P_DEPARTURES_COARSE = Pattern.compile("<p class=\"sq\">(.+?)</p>", Pattern.DOTALL);
|
+ "|(Informationen zu))" //
|
||||||
private static final Pattern P_DEPARTURES_FINE = Pattern.compile(".*?<b>(.*?)</b>\n?" //
|
+ ".*?" //
|
||||||
+ ">>\n?" //
|
, Pattern.DOTALL);
|
||||||
+ "(.*?)\n?" //
|
private static final Pattern P_DEPARTURES_HEAD_FINE = Pattern.compile("" //
|
||||||
+ "<br />\n?" //
|
+ "<b>(.*?)</b><br />\n" // location
|
||||||
+ "<b>(\\d+:\\d+)</b>.*", Pattern.DOTALL);
|
+ "Abfahrt (\\d+:\\d+)\n" // time
|
||||||
private static final Pattern P_DEPARTURES_URI_STATION_ID = Pattern.compile("input=(\\d+)");
|
+ "Uhr, (\\d{2}\\.\\d{2}\\.\\d{2}).*?" // date
|
||||||
|
+ "input=(\\d+).*?" // locationId
|
||||||
|
, Pattern.DOTALL);
|
||||||
|
private static final Pattern P_DEPARTURES_COARSE = Pattern.compile("<p class=\"sq\">\n(.+?)\n</p>", Pattern.DOTALL);
|
||||||
|
private static final Pattern P_DEPARTURES_FINE = Pattern.compile("" //
|
||||||
|
+ "<b>(.*?)</b>\n" // line
|
||||||
|
+ ">>\n" //
|
||||||
|
+ "(.*?)\n" // destination
|
||||||
|
+ "<br />\n" //
|
||||||
|
+ "<b>(\\d+:\\d+)</b>.*?" // time
|
||||||
|
, Pattern.DOTALL);
|
||||||
|
|
||||||
public QueryDeparturesResult queryDepartures(final String uri) throws IOException
|
public QueryDeparturesResult queryDepartures(final String uri) throws IOException
|
||||||
{
|
{
|
||||||
final CharSequence page = ParserUtils.scrape(uri);
|
final CharSequence page = ParserUtils.scrape(uri);
|
||||||
|
|
||||||
final Matcher mStationId = P_DEPARTURES_URI_STATION_ID.matcher(uri);
|
|
||||||
if (!mStationId.find())
|
|
||||||
throw new IllegalStateException(uri);
|
|
||||||
final int stationId = Integer.parseInt(mStationId.group(1));
|
|
||||||
|
|
||||||
// parse page
|
// parse page
|
||||||
final Matcher mHead = P_DEPARTURES_HEAD.matcher(page);
|
final Matcher mHeadCoarse = P_DEPARTURES_HEAD_COARSE.matcher(page);
|
||||||
if (mHead.matches())
|
if (mHeadCoarse.matches())
|
||||||
{
|
{
|
||||||
final String location = ParserUtils.resolveEntities(mHead.group(1));
|
// messages
|
||||||
final Date currentTime = ParserUtils.joinDateTime(ParserUtils.parseDate(mHead.group(3)), ParserUtils.parseTime(mHead.group(2)));
|
if (mHeadCoarse.group(3) != null)
|
||||||
|
return new QueryDeparturesResult(uri, Status.NO_INFO);
|
||||||
|
else if (mHeadCoarse.group(5) != null)
|
||||||
|
return new QueryDeparturesResult(uri, Status.INVALID_STATION);
|
||||||
|
|
||||||
|
final String c = mHeadCoarse.group(1) + mHeadCoarse.group(4);
|
||||||
|
final Matcher mHeadFine = P_DEPARTURES_HEAD_FINE.matcher(c);
|
||||||
|
if (mHeadFine.matches())
|
||||||
|
{
|
||||||
|
final String location = ParserUtils.resolveEntities(mHeadFine.group(1));
|
||||||
|
final Date currentTime = ParserUtils.joinDateTime(ParserUtils.parseDate(mHeadFine.group(3)), ParserUtils
|
||||||
|
.parseTime(mHeadFine.group(2)));
|
||||||
|
final int locationId = Integer.parseInt(mHeadFine.group(4));
|
||||||
final List<Departure> departures = new ArrayList<Departure>(8);
|
final List<Departure> departures = new ArrayList<Departure>(8);
|
||||||
|
|
||||||
// choose matcher
|
final Matcher mDepCoarse = P_DEPARTURES_COARSE.matcher(mHeadCoarse.group(2));
|
||||||
final Matcher mDepCoarse = P_DEPARTURES_COARSE.matcher(page);
|
|
||||||
while (mDepCoarse.find())
|
while (mDepCoarse.find())
|
||||||
{
|
{
|
||||||
final Matcher mDepFine = P_DEPARTURES_FINE.matcher(mDepCoarse.group(1));
|
final Matcher mDepFine = P_DEPARTURES_FINE.matcher(mDepCoarse.group(1));
|
||||||
if (mDepFine.matches())
|
if (mDepFine.matches())
|
||||||
{
|
{
|
||||||
final Departure dep = parseDeparture(mDepFine, currentTime);
|
final String line = normalizeLine(ParserUtils.resolveEntities(mDepFine.group(1)));
|
||||||
|
|
||||||
|
final String destination = ParserUtils.resolveEntities(mDepFine.group(2));
|
||||||
|
|
||||||
|
final Calendar current = new GregorianCalendar();
|
||||||
|
current.setTime(currentTime);
|
||||||
|
final Calendar parsed = new GregorianCalendar();
|
||||||
|
parsed.setTime(ParserUtils.parseTime(mDepFine.group(3)));
|
||||||
|
parsed.set(Calendar.YEAR, current.get(Calendar.YEAR));
|
||||||
|
parsed.set(Calendar.MONTH, current.get(Calendar.MONTH));
|
||||||
|
parsed.set(Calendar.DAY_OF_MONTH, current.get(Calendar.DAY_OF_MONTH));
|
||||||
|
if (ParserUtils.timeDiff(parsed.getTime(), currentTime) < -PARSER_DAY_ROLLOVER_THRESHOLD_MS)
|
||||||
|
parsed.add(Calendar.DAY_OF_MONTH, 1);
|
||||||
|
|
||||||
|
final Departure dep = new Departure(parsed.getTime(), line, line != null ? LINES.get(line.charAt(0)) : null, 0, destination);
|
||||||
|
|
||||||
if (!departures.contains(dep))
|
if (!departures.contains(dep))
|
||||||
departures.add(dep);
|
departures.add(dep);
|
||||||
}
|
}
|
||||||
|
@ -406,34 +437,17 @@ public class SbbProvider implements NetworkProvider
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new QueryDeparturesResult(uri, stationId, location, currentTime, departures);
|
return new QueryDeparturesResult(uri, locationId, location, currentTime, departures);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return new QueryDeparturesResult(uri, Status.NO_INFO);
|
throw new IllegalArgumentException("cannot parse '" + c + "' on " + uri);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
private static Departure parseDeparture(final Matcher mDep, final Date currentTime)
|
|
||||||
{
|
{
|
||||||
// line
|
throw new IllegalArgumentException("cannot parse '" + page + "' on " + uri);
|
||||||
final String line = normalizeLine(ParserUtils.resolveEntities(mDep.group(1)));
|
}
|
||||||
|
|
||||||
// destination
|
|
||||||
final String destination = ParserUtils.resolveEntities(mDep.group(2));
|
|
||||||
|
|
||||||
// time
|
|
||||||
final Calendar current = new GregorianCalendar();
|
|
||||||
current.setTime(currentTime);
|
|
||||||
final Calendar parsed = new GregorianCalendar();
|
|
||||||
parsed.setTime(ParserUtils.parseTime(mDep.group(3)));
|
|
||||||
parsed.set(Calendar.YEAR, current.get(Calendar.YEAR));
|
|
||||||
parsed.set(Calendar.MONTH, current.get(Calendar.MONTH));
|
|
||||||
parsed.set(Calendar.DAY_OF_MONTH, current.get(Calendar.DAY_OF_MONTH));
|
|
||||||
if (ParserUtils.timeDiff(parsed.getTime(), currentTime) < -PARSER_DAY_ROLLOVER_THRESHOLD_MS)
|
|
||||||
parsed.add(Calendar.DAY_OF_MONTH, 1);
|
|
||||||
|
|
||||||
return new Departure(parsed.getTime(), line, line != null ? LINES.get(line.charAt(0)) : null, 0, destination);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Pattern P_NORMALIZE_LINE = Pattern.compile("([A-Za-zÄÖÜäöüß]+)[\\s-]*(.*)");
|
private static final Pattern P_NORMALIZE_LINE = Pattern.compile("([A-Za-zÄÖÜäöüß]+)[\\s-]*(.*)");
|
||||||
|
@ -471,7 +485,7 @@ public class SbbProvider implements NetworkProvider
|
||||||
throw new IllegalStateException("cannot normalize line " + line);
|
throw new IllegalStateException("cannot normalize line " + line);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Pattern P_NORMALIZE_TYPE_SBAHN = Pattern.compile("S\\d*");
|
private static final Pattern P_NORMALIZE_TYPE_SBAHN = Pattern.compile("SN?\\d*");
|
||||||
private static final Pattern P_NORMALIZE_TYPE_BUS = Pattern.compile("BUS\\w*");
|
private static final Pattern P_NORMALIZE_TYPE_BUS = Pattern.compile("BUS\\w*");
|
||||||
|
|
||||||
private static char normalizeType(final String type)
|
private static char normalizeType(final String type)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue