parse links more cautiously

git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@115 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
andreas.schildbach 2010-08-29 09:50:25 +00:00
parent e28029c8df
commit 53d0d688fe
4 changed files with 30 additions and 19 deletions

View file

@ -225,11 +225,12 @@ public final class BahnProvider implements NetworkProvider
+ "von: <span class=\"bold\">(.*?)</span>.*?" // from
+ "nach: <span class=\"bold\">(.*?)</span>.*?" // to
+ "Datum: <span class=\"bold\">.., (.*?)</span>.*?" // currentDate
+ "(?:<a href=\"(http://mobile.bahn.de/bin/mobil/query.exe/dox.*?)\">.*?Fr&#252;her.*?)?" // linkEarlier
+ "(?:<a class=\"noBG\" href=\"(http://mobile.bahn.de/bin/mobil/query.exe/dox.*?)\">.*?Sp&#228;ter.*?)?" // linkLater
+ "(?:<a href=\"(http://mobile.bahn.de/bin/mobil/query.exe/dox[^\"]*?)\">.*?Fr&#252;her.*?)?" // linkEarlier
+ "(?:<a class=\"noBG\" href=\"(http://mobile.bahn.de/bin/mobil/query.exe/dox[^\"]*?)\">.*?Sp&#228;ter.*?)?" // linkLater
, Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_COARSE = Pattern.compile("<tr><td class=\"overview timelink\">(.+?)</td></tr>", Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_FINE = Pattern.compile(".*?<a href=\"(http://mobile.bahn.de/bin/mobil/query.exe/dox.*?)\">" // url
private static final Pattern P_CONNECTIONS_FINE = Pattern.compile(".*?" //
+ "<a href=\"(http://mobile.bahn.de/bin/mobil/query.exe/dox[^\"]*?)\">" // link
+ "(\\d+:\\d+)<br />(\\d+:\\d+)</a></td>.+?" // departureTime, arrivalTime
+ "<td class=\"overview iphonepfeil\">(.*?)<br />.*?" // line
, Pattern.DOTALL);

View file

@ -222,16 +222,19 @@ public class RmvProvider implements NetworkProvider
return queryConnections(uri, page);
}
private static final Pattern P_CONNECTIONS_HEAD = Pattern.compile(".*" //
+ "Von: <b>(.*?)</b>.*?" //
+ "Nach: <b>(.*?)</b>.*?" //
+ "Datum: .., (\\d+\\..\\d+\\.\\d+).*?" //
+ "(?:<a href=\"(http://www.rmv.de/auskunft/bin/jp/query.exe/dox.*?REQ0HafasScrollDir=2)\">Fr&#252;her.*?)?" //
+ "(?:<a href=\"(http://www.rmv.de/auskunft/bin/jp/query.exe/dox.*?REQ0HafasScrollDir=1)\">Sp&#228;ter.*?)?", Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_HEAD = Pattern.compile(".*?" //
+ "Von: <b>(.*?)</b>.*?" // from
+ "Nach: <b>(.*?)</b>.*?" // to
+ "Datum: .., (\\d+\\..\\d+\\.\\d+).*?" // currentDate
+ "(?:<a href=\"(http://www.rmv.de/auskunft/bin/jp/query.exe/dox[^\"]*?REQ0HafasScrollDir=2)\".*?)?" // linkEarlier
+ "(?:<a href=\"(http://www.rmv.de/auskunft/bin/jp/query.exe/dox[^\"]*?REQ0HafasScrollDir=1)\".*?)?" // linkLater
, Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_COARSE = Pattern.compile("<p class=\"con(?:L|D)\">(.+?)</p>", Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_FINE = Pattern.compile(".*?<a href=\"(http://www.rmv.de/auskunft/bin/jp/query.exe/dox.*?)\">" // url
private static final Pattern P_CONNECTIONS_FINE = Pattern.compile(".*?" //
+ "<a href=\"(http://www.rmv.de/auskunft/bin/jp/query.exe/dox[^\"]*?)\">" // link
+ "(\\d+:\\d+)-(\\d+:\\d+)</a>" //
+ "(?:&nbsp;(.+?))?", Pattern.DOTALL);
+ "(?:&nbsp;(.+?))?" //
, Pattern.DOTALL);
private QueryConnectionsResult queryConnections(final String uri, final CharSequence page) throws IOException
{

View file

@ -184,8 +184,8 @@ public class SbbProvider implements NetworkProvider
+ "Von:.*?<td .*?>(.*?)</td>.*?" // from
+ "Datum:.*?<td .*?>.., (\\d{2}\\.\\d{2}\\.\\d{2})</td>.*?" // date
+ "Nach:.*?<td .*?>(.*?)</td>.*?" // to
+ "(?:<a href=\"(http://fahrplan.sbb.ch/bin/query.exe/dn\\?seqnr=\\d+&ident=[\\w\\.]+&REQ0HafasScrollDir=2)\".*?>.*?)?" // linkEarlier
+ "(?:<a href=\"(http://fahrplan.sbb.ch/bin/query.exe/dn\\?seqnr=\\d+&ident=[\\w\\.]+&REQ0HafasScrollDir=1)\".*?>.*?)?" // linkLater
+ "(?:<a href=\"(http://fahrplan.sbb.ch/bin/query.exe/dn[^\"]*?&REQ0HafasScrollDir=2)\".*?)?" // linkEarlier
+ "(?:<a href=\"(http://fahrplan.sbb.ch/bin/query.exe/dn[^\"]*?&REQ0HafasScrollDir=1)\".*?)?" // linkLater
, Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_COARSE = Pattern.compile("<tr class=\"(zebra-row-\\d)\">(.*?)</tr>\n?"//
+ "<tr class=\"\\1\">(.+?)</tr>", Pattern.DOTALL);

View file

@ -272,13 +272,18 @@ public final class VbbProvider implements NetworkProvider
return queryConnections(uri, page);
}
private static final Pattern P_CONNECTIONS_HEAD = Pattern.compile(
".*Von: <strong>(.*?)</strong>.*?Nach: <strong>(.*?)</strong>.*?Datum: .., (.*?)<br />.*?"
+ "(?:<a href=\"(/Fahrinfo/bin/query\\.bin/dox.{1,80}ScrollDir=2)\">.*?)?"
+ "(?:<a href=\"(/Fahrinfo/bin/query\\.bin/dox.{1,80}ScrollDir=1)\">.*?)?", Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_HEAD = Pattern.compile(".*?" //
+ "Von: <strong>(.*?)</strong>.*?" // from
+ "Nach: <strong>(.*?)</strong>.*?" // to
+ "Datum: .., (.*?)<br />.*?" // currentDate
+ "(?:<a href=\"(/Fahrinfo/bin/query\\.bin/dox[^\"]*?ScrollDir=2)\">.*?)?" // linkEarlier
+ "(?:<a href=\"(/Fahrinfo/bin/query\\.bin/dox[^\"]*?ScrollDir=1)\">.*?)?" // linkLater
, Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_COARSE = Pattern.compile("<p class=\"con(?:L|D)\">(.+?)</p>", Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_FINE = Pattern.compile(".*?<a href=\"(/Fahrinfo/bin/query\\.bin/dox.*?)\">"
+ "(\\d\\d:\\d\\d)-(\\d\\d:\\d\\d)</a>&nbsp;&nbsp;(?:\\d+ Umst\\.|([\\w\\d ]+)).*?", Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_FINE = Pattern.compile(".*?" //
+ "<a href=\"(/Fahrinfo/bin/query\\.bin/dox[^\"]*?)\">" // link
+ "(\\d\\d:\\d\\d)-(\\d\\d:\\d\\d)</a>&nbsp;&nbsp;(?:\\d+ Umst\\.|([\\w\\d ]+)).*?" //
, Pattern.DOTALL);
private QueryConnectionsResult queryConnections(final String uri, final CharSequence page) throws IOException
{
@ -602,6 +607,8 @@ public final class VbbProvider implements NetworkProvider
if (line.startsWith("RE") || line.startsWith("RB") || line.startsWith("NE") || line.startsWith("OE") || line.startsWith("MR")
|| line.startsWith("PE"))
return "R" + line;
if (line.equals("11"))
return "?11";
if (P_NORMALIZE_LINE_SPECIAL_NUMBER.matcher(line).matches())
return "R" + line;