use XML based connection query

git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@714 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
andreas.schildbach@gmail.com 2011-06-21 12:13:03 +00:00
parent b2b0f3f0aa
commit fc7dfcafe7

View file

@ -18,25 +18,16 @@
package de.schildbach.pte; package de.schildbach.pte;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import de.schildbach.pte.dto.Connection;
import de.schildbach.pte.dto.GetConnectionDetailsResult;
import de.schildbach.pte.dto.Line;
import de.schildbach.pte.dto.Location; import de.schildbach.pte.dto.Location;
import de.schildbach.pte.dto.LocationType; import de.schildbach.pte.dto.LocationType;
import de.schildbach.pte.dto.NearbyStationsResult; import de.schildbach.pte.dto.NearbyStationsResult;
import de.schildbach.pte.dto.QueryConnectionsResult;
import de.schildbach.pte.dto.QueryDeparturesResult; import de.schildbach.pte.dto.QueryDeparturesResult;
import de.schildbach.pte.exception.SessionExpiredException;
import de.schildbach.pte.util.ParserUtils; import de.schildbach.pte.util.ParserUtils;
/** /**
@ -51,7 +42,7 @@ public class OebbProvider extends AbstractHafasProvider
public OebbProvider() public OebbProvider()
{ {
super(null, 12, null); super(API_BASE + "query.exe/dn", 12, null);
} }
public NetworkId id() public NetworkId id()
@ -72,7 +63,48 @@ public class OebbProvider extends AbstractHafasProvider
@Override @Override
protected void setProductBits(final StringBuilder productBits, final char product) protected void setProductBits(final StringBuilder productBits, final char product)
{ {
throw new UnsupportedOperationException(); if (product == 'I')
{
productBits.setCharAt(0, '1'); // railjet/ICE
productBits.setCharAt(1, '1'); // ÖBB EC/ÖBB IC
productBits.setCharAt(2, '1'); // EC/IC
}
else if (product == 'R')
{
productBits.setCharAt(3, '1'); // D/EN
productBits.setCharAt(4, '1'); // REX/R
}
else if (product == 'S')
{
productBits.setCharAt(5, '1'); // S-Bahnen
}
else if (product == 'U')
{
productBits.setCharAt(8, '1'); // U-Bahn
}
else if (product == 'T')
{
productBits.setCharAt(9, '1'); // Straßenbahn
}
else if (product == 'B')
{
productBits.setCharAt(6, '1'); // Busse
}
else if (product == 'P')
{
productBits.setCharAt(11, '1'); // Anrufpflichtige Verkehre
}
else if (product == 'F')
{
productBits.setCharAt(7, '1'); // Schiffe
}
else if (product == 'C')
{
}
else
{
throw new IllegalArgumentException("cannot handle: " + product);
}
} }
public NearbyStationsResult queryNearbyStations(final Location location, final int maxDistance, final int maxStations) throws IOException public NearbyStationsResult queryNearbyStations(final Location location, final int maxDistance, final int maxStations) throws IOException
@ -138,366 +170,6 @@ public class OebbProvider extends AbstractHafasProvider
WALKSPEED_MAP.put(WalkSpeed.FAST, "85"); WALKSPEED_MAP.put(WalkSpeed.FAST, "85");
} }
private String connectionsQuery(final Location from, final Location via, final Location to, final Date date, final boolean dep,
final String products, final WalkSpeed walkSpeed) throws IOException
{
final Calendar c = new GregorianCalendar(timeZone());
c.setTime(date);
final StringBuilder uri = new StringBuilder();
uri.append("queryPageDisplayed=yes");
uri.append("&ignoreTypeCheck=yes");
uri.append("&REQ0JourneyStopsS0ID=").append(ParserUtils.urlEncode(locationId(from), URL_ENCODING));
if (via != null)
uri.append("&REQ0JourneyStops1.0ID=").append(ParserUtils.urlEncode(locationId(via), URL_ENCODING));
uri.append("&REQ0JourneyStopsZ0ID=").append(ParserUtils.urlEncode(locationId(to), URL_ENCODING));
uri.append("&REQ0JourneyDate=").append(
String.format("%02d.%02d.%02d", c.get(Calendar.DAY_OF_MONTH), c.get(Calendar.MONTH) + 1, c.get(Calendar.YEAR) - 2000));
uri.append("&wDayExt0=").append(ParserUtils.urlEncode("Mo|Di|Mi|Do|Fr|Sa|So"));
uri.append("&REQ0JourneyTime=").append(String.format("%02d:%02d", c.get(Calendar.HOUR_OF_DAY), c.get(Calendar.MINUTE)));
uri.append("&REQ0HafasSearchForw=").append(dep ? "1" : "0");
uri.append("&existHafasDemo3=yes");
uri.append("&REQ0JourneyDep_Foot_speed=").append(WALKSPEED_MAP.get(walkSpeed));
uri.append("&existBikeEverywhere=yes");
uri.append("&existHafasAttrInc=yes");
uri.append("&start=Verbindungen+suchen");
if (products != null)
{
for (final char p : products.toCharArray())
{
if (p == 'I')
{
uri.append("&REQ0JourneyProduct_prod_section_0_0=1&REQ0JourneyProduct_prod_section_0_1=1&REQ0JourneyProduct_prod_section_0_2=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_0=1&REQ0JourneyProduct_prod_section_1_1=1&REQ0JourneyProduct_prod_section_1_2=1");
}
if (p == 'R')
{
uri.append("&REQ0JourneyProduct_prod_section_0_3=1&REQ0JourneyProduct_prod_section_0_4=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_3=1&REQ0JourneyProduct_prod_section_1_4=1");
}
if (p == 'S')
{
uri.append("&REQ0JourneyProduct_prod_section_0_5=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_5=1");
}
if (p == 'U')
{
uri.append("&REQ0JourneyProduct_prod_section_0_8=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_8=1");
}
if (p == 'T')
{
uri.append("&REQ0JourneyProduct_prod_section_0_9=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_9=1");
}
if (p == 'B')
{
uri.append("&REQ0JourneyProduct_prod_section_0_6=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_6=1");
}
if (p == 'P')
{
uri.append("&REQ0JourneyProduct_prod_section_0_11=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_11=1");
}
if (p == 'F')
{
uri.append("&REQ0JourneyProduct_prod_section_0_7=1");
if (via != null)
uri.append("&REQ0JourneyProduct_prod_section_1_7=1");
}
// FIXME if (p == 'C')
}
}
return uri.toString();
}
private static final String QUERY_CONNECTIONS_FORM_URL = API_BASE + "query.exe/dn?";
private static final Pattern P_QUERY_CONNECTIONS_FORM_ACTION = Pattern
.compile("<form id=\"HFSQuery\" action=\"(http://fahrplan\\.oebb\\.at/bin/query\\.exe[^#]*)#");
private static final Pattern P_QUERY_CONNECTIONS_ERROR = Pattern
.compile("(keine Verbindung gefunden|kein Weg gefunden)|(liegt nach dem Ende der Fahrplanperiode|liegt vor Beginn der Fahrplanperiode)|(zwischenzeitlich nicht mehr gespeichert)");
private static final Pattern P_PRE_ADDRESS = Pattern.compile(
"<select.*? name=\"(REQ0JourneyStopsS0K|REQ0JourneyStopsZ0K|REQ0JourneyStops1\\.0K)\"[^>]*>\n(.*?)</select>", Pattern.DOTALL);
private static final Pattern P_ADDRESSES = Pattern.compile("<option[^>]*>\\s*([^<\\[]*)(?:\\[[^\\[]*\\])?\\s*</option>", Pattern.DOTALL);
@Override
public QueryConnectionsResult queryConnections(final Location from, final Location via, final Location to, final Date date, final boolean dep,
final String products, final WalkSpeed walkSpeed) throws IOException
{
// get base url and cookies from form
final CharSequence form = ParserUtils.scrape(QUERY_CONNECTIONS_FORM_URL, false, null, null, "NSC_");
final Matcher m = P_QUERY_CONNECTIONS_FORM_ACTION.matcher(form);
if (!m.find())
throw new IllegalStateException("cannot find form: '" + form + "' on " + QUERY_CONNECTIONS_FORM_URL);
final String baseUri = m.group(1);
// query
final String query = connectionsQuery(from, via, to, date, dep, products, walkSpeed);
final CharSequence page = ParserUtils.scrape(baseUri, true, query, null, "NSC_");
final Matcher mError = P_QUERY_CONNECTIONS_ERROR.matcher(page);
if (mError.find())
{
if (mError.group(1) != null)
return QueryConnectionsResult.NO_CONNECTIONS;
if (mError.group(2) != null)
return QueryConnectionsResult.INVALID_DATE;
if (mError.group(3) != null)
throw new SessionExpiredException();
}
List<Location> fromAddresses = null;
List<Location> viaAddresses = null;
List<Location> toAddresses = null;
final Matcher mPreAddress = P_PRE_ADDRESS.matcher(page);
while (mPreAddress.find())
{
final String type = mPreAddress.group(1);
final String options = mPreAddress.group(2);
final Matcher mAddresses = P_ADDRESSES.matcher(options);
final List<Location> addresses = new ArrayList<Location>();
while (mAddresses.find())
{
final String address = ParserUtils.resolveEntities(mAddresses.group(1)).trim();
if (!addresses.contains(address))
addresses.add(new Location(LocationType.ANY, 0, null, address + "!"));
}
if (type.equals("REQ0JourneyStopsS0K"))
fromAddresses = addresses;
else if (type.equals("REQ0JourneyStopsZ0K"))
toAddresses = addresses;
else if (type.equals("REQ0JourneyStops1.0K"))
viaAddresses = addresses;
else
throw new IllegalStateException(type);
}
if (fromAddresses != null || viaAddresses != null || toAddresses != null)
return new QueryConnectionsResult(fromAddresses, viaAddresses, toAddresses);
else
return queryConnections(baseUri, page);
}
@Override
public QueryConnectionsResult queryMoreConnections(final String uri) throws IOException
{
final CharSequence page = ParserUtils.scrape(uri, false, null, null, "NSC_");
final Matcher mError = P_QUERY_CONNECTIONS_ERROR.matcher(page);
if (mError.find())
{
if (mError.group(1) != null)
return QueryConnectionsResult.NO_CONNECTIONS;
if (mError.group(2) != null)
return QueryConnectionsResult.INVALID_DATE;
if (mError.group(3) != null)
throw new SessionExpiredException();
}
return queryConnections(uri, page);
}
private static final Pattern P_CONNECTIONS_ALL_DETAILS = Pattern.compile("" //
+ "<a id=\"showAllDetails\" class=\"[^\"]*\" href=\"(http://fahrplan\\.oebb\\.at[^\"]*)\">");
private static final Pattern P_CONNECTIONS_HEAD = Pattern.compile(".*?" //
+ "<span class=\"label\">von:</span>\n<span class=\"output\">\\s*(.*?)\\s*</span>.*?" // from
+ "<span class=\"label\">nach:</span>\n<span class=\"output\">\\s*(.*?)\\s*</span>.*?" // to
+ "<span class=\"label\">\nDatum:\n</span>\n<span class=\"output\">.., (\\d{2}\\.\\d{2}\\.\\d{2})</span>.*?" // date
+ "(?:<a href=\"(http://fahrplan\\.oebb\\.at/bin/query\\.exe/dn?.*?&REQ0HafasScrollDir=2)\".*?)?" // linkEarlier
+ "(?:<a href=\"(http://fahrplan\\.oebb\\.at/bin/query\\.exe/dn?.*?&REQ0HafasScrollDir=1)\".*?)?" // linkLater
, Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_COARSE = Pattern.compile("" //
+ "<tr id=\"trOverview(C\\d+-\\d+)\" [^>]*>\n(.*?)</tr>\n" //
+ "<tr class=\"[^\"]*\" id=\"tr\\1\">\n(.*?)Seitenanfang.*?</tr>" //
, Pattern.DOTALL);
private static final Pattern P_CONNECTIONS_FINE = Pattern.compile(".*?" //
+ "<td class=\"date\" headers=\"hafasOVDate\"[^>]*>(\\d{2}\\.\\d{2}\\.\\d{2})" // departureDate
+ "(?:<br />(\\d{2}\\.\\d{2}\\.\\d{2}))?.*?" // arrivalDate
+ "(\\d{1,2}:\\d{2}) ab.*?" // departureTime
+ "(\\d{1,2}:\\d{2}) an.*?" // arrivalTime
, Pattern.DOTALL);
private static final Pattern P_CONNECTION_DETAILS_COARSE = Pattern.compile("" //
+ "<tr class=\"tpDetails (?:conFirstSecFirstRow|intermediateSection|conLastSecLastRow)\">\n(.*?)</tr>\n" //
+ "<tr class=\"tpDetails (?:conFirstSecFirstRow|intermediateSection|conLastSecLastRow)\">\n(.*?)</tr>\n" //
+ "<tr class=\"tpDetails sectionInfo\">\n(.*?)</tr>\n" //
, Pattern.DOTALL);
private static final Pattern P_CONNECTION_DETAILS_FINE = Pattern.compile(".*?" //
+ "<td class=\"station\">\n?(?:<a href=\"http://fahrplan\\.oebb\\.at/bin/stboard\\.exe/dn.*?input=(\\d+)&[^>]*>)?" // departureId
+ "([^\n<]*).*?" // departure
+ "<td class=\"date\">(?:(\\d{2}\\.\\d{2}\\.\\d{2})|&nbsp;)</td>.*?" // departureDate
+ "<td class=\"timeValue\">\n?<span>ab (\\d{2}:\\d{2}).*?" // departureTime
+ "<td class=\"platform\">\\s*(?:&nbsp;|(.*?))\\s*</td>.*?" // departurePosition
+ "<img class=\"product\" src=\"/img/vs_oebb/(\\w+?)_pic.gif\".*?" // lineType
+ "(?:<a href=\"http://fahrplan\\.oebb\\.at/bin/traininfo\\.exe/dn[^>]*>(.*?)</a>.*?)?" // line
+ "<td class=\"station\">\n?(?:<a href=\"http://fahrplan\\.oebb\\.at/bin/stboard\\.exe/dn.*?input=(\\d+)&[^>]*>)?" // arrivalId
+ "([^\n<]*).*?" // arrival
+ "<td class=\"date\">(?:(\\d{2}\\.\\d{2}\\.\\d{2})|&nbsp;)</td>.*?" // arrivalDate
+ "<td class=\"timeValue\">\n?<span>an (\\d{2}:\\d{2}).*?" // arrivalTime
+ "<td class=\"platform\">\\s*(?:&nbsp;|(.*?))\\s*</td>.*?" // arrivalPosition
+ "<td[^>]* class=\"section_remarks\">(?:.*?Richtung\\:</span>\\s*([^\n]*)\n)?.*?</td>?.*?" // destination
, Pattern.DOTALL);
private QueryConnectionsResult queryConnections(final String firstUri, final CharSequence firstPage) throws IOException
{
// ugly workaround to fetch all details
final Matcher mAllDetailsAction = P_CONNECTIONS_ALL_DETAILS.matcher(firstPage);
if (!mAllDetailsAction.find())
throw new IOException("cannot find all details link in '" + firstPage + "' on " + firstUri);
final String allDetailsUri = mAllDetailsAction.group(1);
final CharSequence page = ParserUtils.scrape(allDetailsUri, false, null, null, "NSC_");
final Matcher mError = P_QUERY_CONNECTIONS_ERROR.matcher(page);
if (mError.find())
{
if (mError.group(1) != null)
return QueryConnectionsResult.NO_CONNECTIONS;
if (mError.group(2) != null)
return QueryConnectionsResult.INVALID_DATE;
if (mError.group(3) != null)
throw new SessionExpiredException();
}
// parse page
final Matcher mHead = P_CONNECTIONS_HEAD.matcher(page);
if (mHead.matches())
{
final Location from = new Location(LocationType.ANY, 0, null, ParserUtils.resolveEntities(mHead.group(1)));
final Location to = new Location(LocationType.ANY, 0, null, ParserUtils.resolveEntities(mHead.group(2)));
final Calendar time = new GregorianCalendar(timeZone());
time.clear();
ParserUtils.parseGermanDate(time, mHead.group(3));
// final String linkEarlier = mHead.group(4) != null ? ParserUtils.resolveEntities(mHead.group(4)) : null;
final String linkLater = mHead.group(5) != null ? ParserUtils.resolveEntities(mHead.group(5)) : null;
final List<Connection> connections = new ArrayList<Connection>();
final Matcher mConCoarse = P_CONNECTIONS_COARSE.matcher(page);
while (mConCoarse.find())
{
final String id = mConCoarse.group(1);
final String overview = mConCoarse.group(2);
final String details = mConCoarse.group(3);
final Matcher mConFine = P_CONNECTIONS_FINE.matcher(overview);
if (mConFine.matches())
{
final Calendar overviewDepartureTime = new GregorianCalendar(timeZone());
overviewDepartureTime.clear();
ParserUtils.parseGermanDate(overviewDepartureTime, mConFine.group(1));
ParserUtils.parseEuropeanTime(overviewDepartureTime, mConFine.group(3));
final Calendar overviewArrivalTime = new GregorianCalendar(timeZone());
overviewArrivalTime.setTimeInMillis(overviewDepartureTime.getTimeInMillis());
if (mConFine.group(2) != null)
ParserUtils.parseGermanDate(overviewArrivalTime, mConFine.group(2));
ParserUtils.parseEuropeanTime(overviewArrivalTime, mConFine.group(4));
final String link = allDetailsUri; // TODO use print link?
final Connection connection = new Connection(id, link, overviewDepartureTime.getTime(), overviewArrivalTime.getTime(), from, to,
new ArrayList<Connection.Part>(1), null, null);
connections.add(connection);
final Matcher mDetCoarse = P_CONNECTION_DETAILS_COARSE.matcher(details);
while (mDetCoarse.find())
{
final String set = mDetCoarse.group(1) + mDetCoarse.group(2) + mDetCoarse.group(3);
final Matcher mDetFine = P_CONNECTION_DETAILS_FINE.matcher(set);
if (mDetFine.matches())
{
final int departureId = mDetFine.group(1) != null ? Integer.parseInt(mDetFine.group(1)) : 0;
final Location departure = new Location(departureId != 0 ? LocationType.STATION : LocationType.ANY, departureId, null,
ParserUtils.resolveEntities(mDetFine.group(2)));
if (mDetFine.group(3) != null)
ParserUtils.parseGermanDate(time, mDetFine.group(3));
ParserUtils.parseEuropeanTime(time, mDetFine.group(4));
final Date detailsDepartureTime = time.getTime();
final String lineType = mDetFine.group(6);
final int arrivalId = mDetFine.group(8) != null ? Integer.parseInt(mDetFine.group(8)) : 0;
final Location arrival = new Location(arrivalId != 0 ? LocationType.STATION : LocationType.ANY, arrivalId, null,
ParserUtils.resolveEntities(mDetFine.group(9)));
if (mDetFine.group(10) != null)
ParserUtils.parseGermanDate(time, mDetFine.group(10));
ParserUtils.parseEuropeanTime(time, mDetFine.group(11));
final Date detailsArrivalTime = time.getTime();
if (!("fuss".equals(lineType) || "transfer".equals(lineType)))
{
if (departureId == 0)
throw new IllegalStateException("departureId");
final String departurePosition = mDetFine.group(5) != null ? ParserUtils.resolveEntities(mDetFine.group(5)) : null;
final String lineStr = normalizeLine(lineType, ParserUtils.resolveEntities(mDetFine.group(7)));
final Line line = new Line(null, lineStr, lineColors(lineStr));
if (arrivalId == 0)
throw new IllegalStateException("arrivalId");
final String arrivalPosition = mDetFine.group(12) != null ? ParserUtils.resolveEntities(mDetFine.group(12)) : null;
final Location destination = mDetFine.group(13) != null ? new Location(LocationType.ANY, 0, null,
ParserUtils.resolveEntities(mDetFine.group(13))) : null;
final Connection.Trip trip = new Connection.Trip(line, destination, detailsDepartureTime, departurePosition,
departure, detailsArrivalTime, arrivalPosition, arrival, null, null);
connection.parts.add(trip);
}
else
{
final int min = (int) (detailsArrivalTime.getTime() - detailsDepartureTime.getTime()) / 1000 / 60;
final Connection.Footway footway = new Connection.Footway(min, departure, arrival, null);
connection.parts.add(footway);
}
}
else
{
throw new IllegalArgumentException("cannot parse '" + set + "' on " + allDetailsUri);
}
}
}
else
{
throw new IllegalArgumentException("cannot parse '" + overview + "' on " + allDetailsUri);
}
}
return new QueryConnectionsResult(allDetailsUri, from, null, to, linkLater, connections);
}
else
{
throw new IllegalArgumentException("cannot parse '" + page + "' on " + allDetailsUri);
}
}
@Override
public GetConnectionDetailsResult getConnectionDetails(final String connectionUri) throws IOException
{
throw new UnsupportedOperationException();
}
private static final Pattern P_NORMALIZE_LINE_AND_TYPE = Pattern.compile("([^#]*)#(.*)"); private static final Pattern P_NORMALIZE_LINE_AND_TYPE = Pattern.compile("([^#]*)#(.*)");
private static final Pattern P_NORMALIZE_LINE_NUMBER = Pattern.compile("\\d{2,5}"); private static final Pattern P_NORMALIZE_LINE_NUMBER = Pattern.compile("\\d{2,5}");