mirror of
https://gitlab.com/oeffi/public-transport-enabler.git
synced 2025-07-16 09:29:49 +00:00
split departures head parsing in coarse and fine
git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@76 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
parent
8b4b691aeb
commit
5b1f74082c
1 changed files with 59 additions and 51 deletions
|
@ -414,13 +414,18 @@ public final class BahnProvider implements NetworkProvider
|
|||
return uri.toString();
|
||||
}
|
||||
|
||||
private static final Pattern P_DEPARTURES_HEAD = Pattern.compile(".*<div class=\"haupt rline\">.*?"
|
||||
+ "<span class=\"bold\">\\n?(.+?)\\s*(?:- Aktuell)?\\n</span>.*?" //
|
||||
+ "Abfahrt (\\d+:\\d+)\\n?Uhr, (\\d+\\.\\d+\\.\\d+)\\n?" //
|
||||
+ "</div>.*", Pattern.DOTALL);
|
||||
private static final Pattern P_DEPARTURES_HEAD_COARSE = Pattern.compile(
|
||||
".*?<title>Deutsche Bahn - Abfahrt</title>.*?<body >(.*?Abfahrt.*?)</body>.*?", Pattern.DOTALL);
|
||||
private static final Pattern P_DEPARTURES_HEAD_FINE = Pattern.compile(".*?" //
|
||||
+ "<div class=\"haupt rline\">\n?<span class=\"bold\">\\n?(.+?)\\s*(?:- Aktuell)?\\n</span>.*?" // location
|
||||
+ "Abfahrt (\\d+:\\d+)\\n?Uhr, (\\d+\\.\\d+\\.\\d+).*?" // currentTime
|
||||
, Pattern.DOTALL);
|
||||
private static final Pattern P_DEPARTURES_COARSE = Pattern.compile("<div class=\"sqdetailsDep trow\">(.+?)</div>", Pattern.DOTALL);
|
||||
private static final Pattern P_DEPARTURES_FINE = Pattern.compile(".*?<span class=\"bold\">(.*?)</span>.*?"
|
||||
+ ">>\\n?\\s*(.+?)\\s*\\n?<br />\\n?<span class=\"bold\">(\\d+:\\d+)</span>.*?", Pattern.DOTALL);
|
||||
private static final Pattern P_DEPARTURES_FINE = Pattern.compile(".*?" //
|
||||
+ "<span class=\"bold\">(.*?)</span>.*?" // line
|
||||
+ ">>\\n?\\s*(.+?)\\s*\\n?<br />\\n?" // destination
|
||||
+ "<span class=\"bold\">(\\d+:\\d+)</span>.*?" // time
|
||||
, Pattern.DOTALL);
|
||||
private static final Pattern P_DEPARTURES_URI_STATION_ID = Pattern.compile("input=(\\d+)");
|
||||
|
||||
public QueryDeparturesResult queryDepartures(final String uri) throws IOException
|
||||
|
@ -432,32 +437,57 @@ public final class BahnProvider implements NetworkProvider
|
|||
throw new IllegalStateException(uri);
|
||||
final int stationId = Integer.parseInt(mStationId.group(1));
|
||||
|
||||
// parse page
|
||||
final Matcher mHead = P_DEPARTURES_HEAD.matcher(page);
|
||||
if (mHead.matches())
|
||||
final Matcher mHeadCoarse = P_DEPARTURES_HEAD_COARSE.matcher(page);
|
||||
if (mHeadCoarse.matches())
|
||||
{
|
||||
final String location = ParserUtils.resolveEntities(mHead.group(1));
|
||||
final Date currentTime = ParserUtils.joinDateTime(ParserUtils.parseDate(mHead.group(3)), ParserUtils.parseTime(mHead.group(2)));
|
||||
final List<Departure> departures = new ArrayList<Departure>(8);
|
||||
|
||||
// choose matcher
|
||||
final Matcher mDepCoarse = P_DEPARTURES_COARSE.matcher(page);
|
||||
while (mDepCoarse.find())
|
||||
final Matcher mHeadFine = P_DEPARTURES_HEAD_FINE.matcher(mHeadCoarse.group(1));
|
||||
if (mHeadFine.matches())
|
||||
{
|
||||
final Matcher mDepFine = P_DEPARTURES_FINE.matcher(mDepCoarse.group(1));
|
||||
if (mDepFine.matches())
|
||||
{
|
||||
final Departure dep = parseDeparture(mDepFine, currentTime);
|
||||
if (!departures.contains(dep))
|
||||
departures.add(dep);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("cannot parse '" + mDepCoarse.group(1) + "' on " + uri);
|
||||
}
|
||||
}
|
||||
final String location = ParserUtils.resolveEntities(mHeadFine.group(1));
|
||||
final Date currentTime = ParserUtils.joinDateTime(ParserUtils.parseDate(mHeadFine.group(3)), ParserUtils
|
||||
.parseTime(mHeadFine.group(2)));
|
||||
final List<Departure> departures = new ArrayList<Departure>(8);
|
||||
|
||||
return new QueryDeparturesResult(uri, stationId, location, currentTime, departures);
|
||||
// choose matcher
|
||||
final Matcher mDepCoarse = P_DEPARTURES_COARSE.matcher(page);
|
||||
while (mDepCoarse.find())
|
||||
{
|
||||
final Matcher mDepFine = P_DEPARTURES_FINE.matcher(mDepCoarse.group(1));
|
||||
if (mDepFine.matches())
|
||||
{
|
||||
// line
|
||||
final String line = normalizeLine(ParserUtils.resolveEntities(mDepFine.group(1)));
|
||||
|
||||
// destination
|
||||
final String destination = ParserUtils.resolveEntities(mDepFine.group(2));
|
||||
|
||||
// time
|
||||
final Calendar current = new GregorianCalendar();
|
||||
current.setTime(currentTime);
|
||||
final Calendar parsed = new GregorianCalendar();
|
||||
parsed.setTime(ParserUtils.parseTime(mDepFine.group(3)));
|
||||
parsed.set(Calendar.YEAR, current.get(Calendar.YEAR));
|
||||
parsed.set(Calendar.MONTH, current.get(Calendar.MONTH));
|
||||
parsed.set(Calendar.DAY_OF_MONTH, current.get(Calendar.DAY_OF_MONTH));
|
||||
if (ParserUtils.timeDiff(parsed.getTime(), currentTime) < -PARSER_DAY_ROLLOVER_THRESHOLD_MS)
|
||||
parsed.add(Calendar.DAY_OF_MONTH, 1);
|
||||
|
||||
final Departure dep = new Departure(parsed.getTime(), line, line != null ? LINES.get(line.charAt(0)) : null, 0, destination);
|
||||
if (!departures.contains(dep))
|
||||
departures.add(dep);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("cannot parse '" + mDepCoarse.group(1) + "' on " + uri);
|
||||
}
|
||||
}
|
||||
|
||||
return new QueryDeparturesResult(uri, stationId, location, currentTime, departures);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new IllegalArgumentException("cannot parse '" + mHeadCoarse.group(1) + "' on " + uri);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -465,28 +495,6 @@ public final class BahnProvider implements NetworkProvider
|
|||
}
|
||||
}
|
||||
|
||||
private static Departure parseDeparture(final Matcher mDep, final Date currentTime)
|
||||
{
|
||||
// line
|
||||
final String line = normalizeLine(ParserUtils.resolveEntities(mDep.group(1)));
|
||||
|
||||
// destination
|
||||
final String destination = ParserUtils.resolveEntities(mDep.group(2));
|
||||
|
||||
// time
|
||||
final Calendar current = new GregorianCalendar();
|
||||
current.setTime(currentTime);
|
||||
final Calendar parsed = new GregorianCalendar();
|
||||
parsed.setTime(ParserUtils.parseTime(mDep.group(3)));
|
||||
parsed.set(Calendar.YEAR, current.get(Calendar.YEAR));
|
||||
parsed.set(Calendar.MONTH, current.get(Calendar.MONTH));
|
||||
parsed.set(Calendar.DAY_OF_MONTH, current.get(Calendar.DAY_OF_MONTH));
|
||||
if (ParserUtils.timeDiff(parsed.getTime(), currentTime) < -PARSER_DAY_ROLLOVER_THRESHOLD_MS)
|
||||
parsed.add(Calendar.DAY_OF_MONTH, 1);
|
||||
|
||||
return new Departure(parsed.getTime(), line, line != null ? LINES.get(line.charAt(0)) : null, 0, destination);
|
||||
}
|
||||
|
||||
private static final Pattern P_NORMALIZE_LINE_NUMBER = Pattern.compile("\\d{2,5}");
|
||||
private static final Pattern P_NORMALIZE_LINE = Pattern.compile("([A-Za-zÄÖÜäöüß]+)[\\s-]*(.*)");
|
||||
private static final Pattern P_NORMALIZE_LINE_RUSSIA = Pattern.compile("(?:D\\s*)?(\\d{1,3}[A-Z]{2})");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue