parse trip without product

git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@177 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
andreas.schildbach 2010-09-22 01:43:26 +00:00
parent 1d8ae79785
commit edab600f71
2 changed files with 62 additions and 25 deletions

View file

@ -470,18 +470,19 @@ public class MvvProvider implements NetworkProvider
private static final Pattern P_CONNECTION_DETAILS_HEAD = Pattern.compile(".*<b>Detailansicht</b>.*?" //
+ "<b>Datum:[\\xa0\\s]+</b>\\w{2}\\.,\\s(\\d+)\\.\\s(\\w{3,4})\\.[\\xa0\\s]+(\\d{4}).*", Pattern.DOTALL);
private static final Pattern P_CONNECTION_DETAILS_COARSE = Pattern.compile("<tr bgcolor=\"#(\\w{6})\">(.+?)</tr>.*?"
+ "<tr bgcolor=\"#\\1\">(.+?)</tr>.*?" //
+ "<tr bgcolor=\"#\\1\">(.+?)</tr>", Pattern.DOTALL);
static final Pattern P_CONNECTION_DETAILS_FINE = Pattern.compile(".*?(?:" //
+ "ab (\\d+:\\d+)\\s+(.*?)\\s*<.*?" //
+ "<img src=\"images/means.*?\" alt=\"(.*?)\" />.*?" //
private static final Pattern P_CONNECTION_DETAILS_COARSE = Pattern.compile("" //
+ "<tr bgcolor=\"#(\\w{6})\">\r\\x0a(.+?)</tr>.*?" //
+ "<tr bgcolor=\"#\\1\">\r\\x0a(.+?)</tr>.*?" //
+ "<tr bgcolor=\"#\\1\">\r\\x0a(.+?)</tr>", Pattern.DOTALL);
static final Pattern P_CONNECTION_DETAILS_FINE = Pattern.compile("(?:" //
+ "<td colspan=\"\\d+\">ab (\\d{1,2}:\\d{2})\\s(.*?)\\s*<.*?" // departureTime, departure
+ "(?:<img src=\"images/means.*?\" alt=\"(.*?)\" />.*?)?" // product
+ "<td>\\s*(.*?)\\s*<br />Richtung\\s*(.*?)\\s*</td>.*?" //
+ "an (\\d+:\\d+)\\s+(.*?)\\s*<" //
+ "<td colspan=\"\\d+\">an (\\d{1,2}:\\d{2})\\s(.*?)\\s*<" //
+ "|" //
+ "ab\\s+(.*?)\\s*<.*?" //
+ "<td colspan=\"\\d+\">ab (.*?)\\s*<.*?" // departure
+ "Fußweg[\\xa0\\s]+\\(ca\\.[\\xa0\\s]+(\\d+)[\\xa0\\s]+Minute.*?" //
+ "an\\s+(.*?)\\s*<" //
+ "<td colspan=\"\\d+\">an (.*?)\\s*<" //
+ ").*?", Pattern.DOTALL);
private static final Pattern P_CONNECTION_DETAILS_ERRORS = Pattern.compile("(session has expired)", Pattern.CASE_INSENSITIVE);
private static final String SITZENBLEIBER = "Sitzenbleiber";
@ -502,10 +503,17 @@ public class MvvProvider implements NetworkProvider
String firstDeparture = null;
Date lastArrivalTime = null;
String lastArrival = null;
String oldZebra = null;
final Matcher mDetCoarse = P_CONNECTION_DETAILS_COARSE.matcher(page);
while (mDetCoarse.find())
{
final String zebra = mDetCoarse.group(1);
if (oldZebra != null && zebra.equals(oldZebra))
throw new IllegalArgumentException("missed row? last:" + zebra);
else
oldZebra = zebra;
final String set = mDetCoarse.group(2) + mDetCoarse.group(3) + mDetCoarse.group(4);
if (!set.contains(SITZENBLEIBER))
{

View file

@ -17,6 +17,8 @@
package de.schildbach.pte;
import static junit.framework.Assert.assertNull;
import static junit.framework.Assert.assertNotNull;
import static junit.framework.Assert.assertTrue;
import java.util.regex.Matcher;
@ -32,66 +34,89 @@ public class MvvProviderTest
@Test
public void trip()
{
assertFineConnectionDetails("\n" //
final Matcher m = assertFineConnectionDetails("" //
+ "<td colspan=\"4\">ab 04:27 Machern (Sachs) Gleis 2<br />\n" //
+ "</td>\n" //
+ "\n" //
+ "<td width=\"15\" valign=\"middle\">\n" //
+ "<img src=\"images/means/zug.gif\" alt=\"Zug\" />\n" //
+ "</td>\n" //
+ "<td width=\"1\" valign=\"middle\" />\n" //
+ "<td>MRB 88040 Mitteldeutsche Regiobahn <br />Richtung Leipzig Hbf</td>\n" //
+ "<td width=\"1\"> </td>\n" //
+ "\n" //
+ "<td colspan=\"4\">an 04:47 Leipzig Hbf Gleis 19</td>\n");
assertNotNull(m.group(1)); // departureTime
assertNotNull(m.group(2)); // departure
assertNotNull(m.group(3)); // product
}
@Test
public void trip2()
{
assertFineConnectionDetails("\n" //
final Matcher m = assertFineConnectionDetails("" //
+ "<td colspan=\"4\">ab 09:04 Hauptbahnhof Haupthalle Gleis 26 <a class=\"imgLink\" href=\"XSLT_TRIP_REQUEST2?language=de&amp;tripSelector2=on&amp;sessionID=MVV2_1641919219&amp;requestID=1&amp;tripSelection=on&amp;itdLPxx_view=map_2&amp;itdLPxx_img=FILELOAD?Filename=mvv2_4C4530855.png&amp;itdLPxx_partialRoute=3&amp;imageFormat=PNG&amp;imageWidth=400&amp;imageHeight=300&amp;imageOnly=1&amp;imageNoTiles=1&amp;itdLPxx_usage=departure\"><img src=\"images/pdf.gif\" border=\"0\" alt=\"Karte\" /></a>\n" //
+ "<br />\n" //
+ "</td>\n" //
+ "\n" //
+ "<td width=\"15\" valign=\"middle\">\n" //
+ "<img src=\"images/means/zug.gif\" alt=\"Zug\" />\n" //
+ "</td>\n" //
+ "<td width=\"1\" valign=\"middle\" />\n" //
+ "<td>RE 4006 RegionalExpress <br />Richtung Nürnberg Hbf</td>\n" //
+ "<td width=\"1\"> </td>\n" //
+ "\n" //
+ "<td colspan=\"4\">an 10:47 Nürnberg Hbf Gleis 12</td>\n");
assertNotNull(m.group(1)); // departureTime
assertNotNull(m.group(2)); // departure
assertNotNull(m.group(3)); // product
}
@Test
@Ignore("deactivated because there is no time")
public void tripWithoutTime()
{
assertFineConnectionDetails("\n" //
final Matcher m = assertFineConnectionDetails("" //
+ "<td colspan=\"4\">ab Neufahrn <a class=\"imgLink\" href=\"XSLT_TRIP_REQUEST2?language=de&amp;tripSelector2=on&amp;sessionID=MVV2_1678243657&amp;requestID=1&amp;tripSelection=on&amp;itdLPxx_view=map_2&amp;itdLPxx_img=FILELOAD?Filename=mvv2_4C45BE6910.png&amp;itdLPxx_partialRoute=2&amp;imageFormat=PNG&amp;imageWidth=400&amp;imageHeight=300&amp;imageOnly=1&amp;imageNoTiles=1&amp;itdLPxx_usage=departure\"><img src=\"images/pdf.gif\" border=\"0\" alt=\"Karte\" /></a>\n" //
+ "<br />\n" //
+ "</td>\n" //
+ "\n" //
+ "<td width=\"15\" valign=\"middle\">\n" //
+ "<img src=\"images/means/seat.gif\" alt=\"Sitzenbleiber\" />\n" //
+ "</td>\n" //
+ "<td width=\"1\" valign=\"middle\" />\n" //
+ "<td>nicht umsteigen</td>\n" //
+ "<td width=\"1\"> </td>\n" //
+ "\n" //
+ "<td colspan=\"4\">an Neufahrn <a class=\"imgLink\" href=\"XSLT_TRIP_REQUEST2?language=de&amp;tripSelector2=on&amp;sessionID=MVV2_1678243657&amp;requestID=1&amp;tripSelection=on&amp;itdLPxx_view=map_2&amp;itdLPxx_img=FILELOAD?Filename=mvv2_4C45BE6911.png&amp;itdLPxx_partialRoute=2&amp;imageFormat=PNG&amp;imageWidth=400&amp;imageHeight=300&amp;imageOnly=1&amp;imageNoTiles=1&amp;command=nop&amp;itdLPxx_usage=arrival\"><img src=\"images/pdf.gif\" border=\"0\" alt=\"Karte\" /></a>\n" //
+ "</td>\n");
assertNotNull(m.group(2)); // departure
assertNotNull(m.group(3)); // product
}
@Test
public void tripWithoutProduct()
{
final Matcher m = assertFineConnectionDetails("" //
+ "<td colspan=\"4\">ab 07:46 Niederstraub. Abzw.Krottenthal <a class=\"imgLink\" href=\"XSLT_TRIP_REQUEST2?language=de&amp;tripSelector3=on&amp;sessionID=MVV1_1237094531&amp;requestID=1&amp;tripSelection=on&amp;itdLPxx_view=map_3&amp;itdLPxx_img=FILELOAD?Filename=mvv1_4C98DF684.png&amp;itdLPxx_partialRoute=4&amp;imageFormat=PNG&amp;imageWidth=400&amp;imageHeight=300&amp;imageOnly=1&amp;imageNoTiles=1&amp;itdLPxx_usage=departure\"><img src=\"images/pdf.gif\" border=\"0\" alt=\"Karte\" /></a>\n" //
+ "<br />\n" //
+ "</td>\n" //
+ "<td width=\"15\" valign=\"middle\"></td>\n" //
+ "<td width=\"1\" valign=\"middle\" />\n" //
+ "<td>MVV-Ruftaxi 5621 <br />Richtung Taufkirchen (Vils) Busbahnhof</td>\n" //
+ "<td width=\"1\"> </td>\n" //
+ "<td colspan=\"4\">an 07:49 Dickarting <a class=\"imgLink\" href=\"XSLT_TRIP_REQUEST2?language=de&amp;tripSelector3=on&amp;sessionID=MVV1_1237094531&amp;requestID=1&amp;tripSelection=on&amp;itdLPxx_view=map_3&amp;itdLPxx_img=FILELOAD?Filename=mvv1_4C98DF685.png&amp;itdLPxx_partialRoute=4&amp;imageFormat=PNG&amp;imageWidth=400&amp;imageHeight=300&amp;imageOnly=1&amp;imageNoTiles=1&amp;command=nop&amp;itdLPxx_usage=arrival\"><img src=\"images/pdf.gif\" border=\"0\" alt=\"Karte\" /></a>\n" //
+ "</td>\n");
assertNotNull(m.group(1)); // departureTime
assertNotNull(m.group(2)); // departure
assertNull(m.group(3)); // product
}
@Test
public void footway()
{
assertFineConnectionDetails("\n" //
final Matcher m = assertFineConnectionDetails("" //
+ "<td colspan=\"4\">ab München Infanteriestraße 7  <a class=\"imgLink\" href=\"XSLT_TRIP_REQUEST2?language=de&amp;tripSelector5=on&amp;sessionID=MVV2_3994525266&amp;requestID=1&amp;tripSelection=on&amp;itdLPxx_view=map_5&amp;itdLPxx_img=FILELOAD?Filename=mvv2_4C6916821.png&amp;itdLPxx_partialRoute=1&amp;imageFormat=PNG&amp;imageWidth=400&amp;imageHeight=300&amp;imageOnly=1&amp;imageNoTiles=1&amp;itdLPxx_usage=departure\"><img src=\"images/pdf.gif\" border=\"0\" alt=\"Karte\" /></a>\n" //
+ "<br />\n" //
+ "</td>\n" //
+ "\n" //
+ "<td width=\"15\" valign=\"middle\">\n" //
+ "<img src=\"images/means/fuss.gif\" alt=\"Fussweg\" />\n" //
+ "</td>\n" //
@ -100,18 +125,18 @@ public class MvvProviderTest
+ " (ca. 3 Minuten)\n" //
+ " </td>\n" //
+ "<td width=\"1\"> </td>\n" //
+ "\n" //
+ "<td colspan=\"4\">an Infanteriestraße Süd  <a class=\"imgLink\" href=\"XSLT_TRIP_REQUEST2?language=de&amp;tripSelector5=on&amp;sessionID=MVV2_3994525266&amp;requestID=1&amp;tripSelection=on&amp;itdLPxx_view=map_5&amp;itdLPxx_img=FILELOAD?Filename=mvv2_4C6916822.png&amp;itdLPxx_partialRoute=1&amp;imageFormat=PNG&amp;imageWidth=400&amp;imageHeight=300&amp;imageOnly=1&amp;imageNoTiles=1&amp;command=nop&amp;itdLPxx_usage=arrival\"><img src=\"images/pdf.gif\" border=\"0\" alt=\"Karte\" /></a>\n" //
+ "</td>\n");
assertNotNull(m.group(8)); // departure
}
@Test
public void footway2()
{
assertFineConnectionDetails("\n" //
final Matcher m = assertFineConnectionDetails("" //
+ "<td colspan=\"4\">ab Weimar Gleis 1<br />\n" //
+ "</td>\n" //
+ "\n" //
+ "<td width=\"15\" valign=\"middle\">\n" //
+ "<img src=\"images/means/fuss.gif\" alt=\"Fussweg\" />\n" //
+ "</td>\n" //
@ -120,14 +145,18 @@ public class MvvProviderTest
+ "(ca. 2 Minuten)\n" //
+ "</td>\n" //
+ "<td width=\"1\"> </td>\n" //
+ "\n" //
+ "<td colspan=\"4\">an Weimar Gleis 2</td>\n");
assertNotNull(m.group(8)); // departure
}
private void assertFineConnectionDetails(String s)
private Matcher assertFineConnectionDetails(String s)
{
Matcher m = MvvProvider.P_CONNECTION_DETAILS_FINE.matcher(s);
assertTrue(m.matches());
// ParserUtils.printGroups(m);
return m;
}
}