Extract HttpClient into own class which can keep state (cookies and configuration for now).

This commit is contained in:
Andreas Schildbach 2015-07-02 16:44:51 +02:00
parent 07a0b8b535
commit 00a395a9ad
12 changed files with 431 additions and 386 deletions

View file

@ -79,6 +79,7 @@ import de.schildbach.pte.dto.Trip;
import de.schildbach.pte.dto.Trip.Leg; import de.schildbach.pte.dto.Trip.Leg;
import de.schildbach.pte.exception.InvalidDataException; import de.schildbach.pte.exception.InvalidDataException;
import de.schildbach.pte.exception.ParserException; import de.schildbach.pte.exception.ParserException;
import de.schildbach.pte.util.HttpClient;
import de.schildbach.pte.util.ParserUtils; import de.schildbach.pte.util.ParserUtils;
import de.schildbach.pte.util.XmlPullUtil; import de.schildbach.pte.util.XmlPullUtil;
@ -274,7 +275,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
if (!httpPost) if (!httpPost)
uri.append(parameters); uri.append(parameters);
final CharSequence page = ParserUtils.scrape(uri.toString(), httpPost ? parameters.substring(1) : null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), httpPost ? parameters.substring(1) : null, Charsets.UTF_8);
final ResultHeader header = new ResultHeader(network, SERVER_PRODUCT); final ResultHeader header = new ResultHeader(network, SERVER_PRODUCT);
try try
@ -389,8 +390,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
pp.setInput(is, null); pp.setInput(is, null);
@ -436,8 +437,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
pp.setInput(is, null); pp.setInput(is, null);
@ -555,8 +556,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
pp.setInput(is, null); pp.setInput(is, null);
@ -634,8 +635,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
pp.setInput(is, null); pp.setInput(is, null);
@ -946,8 +947,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
pp.setInput(is, null); pp.setInput(is, null);
@ -1509,8 +1510,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
pp.setInput(is, null); pp.setInput(is, null);
@ -1671,8 +1672,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
pp.setInput(is, null); pp.setInput(is, null);
@ -2134,8 +2135,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
return queryTrips(uri.toString(), is); return queryTrips(uri.toString(), is);
} }
@ -2170,8 +2171,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName); is = httpClient.getInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
return queryTripsMobile(uri.toString(), from, via, to, is); return queryTripsMobile(uri.toString(), from, via, to, is);
} }
@ -2202,8 +2203,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName); is = httpClient.getInputStream(uri.toString(), null, null, httpRefererTrip);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
return queryTrips(uri.toString(), is); return queryTrips(uri.toString(), is);
} }
@ -2234,8 +2235,8 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName); is = httpClient.getInputStream(uri.toString(), null, null, httpRefererTrip);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
is.mark(512); is.mark(512);
return queryTripsMobile(uri.toString(), null, null, null, is); return queryTripsMobile(uri.toString(), null, null, null, is);

View file

@ -77,6 +77,7 @@ import de.schildbach.pte.dto.SuggestedLocation;
import de.schildbach.pte.dto.Trip; import de.schildbach.pte.dto.Trip;
import de.schildbach.pte.exception.ParserException; import de.schildbach.pte.exception.ParserException;
import de.schildbach.pte.exception.SessionExpiredException; import de.schildbach.pte.exception.SessionExpiredException;
import de.schildbach.pte.util.HttpClient;
import de.schildbach.pte.util.LittleEndianDataInputStream; import de.schildbach.pte.util.LittleEndianDataInputStream;
import de.schildbach.pte.util.ParserUtils; import de.schildbach.pte.util.ParserUtils;
import de.schildbach.pte.util.StringReplaceReader; import de.schildbach.pte.util.StringReplaceReader;
@ -371,7 +372,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
protected final SuggestLocationsResult jsonGetStops(final String uri) throws IOException protected final SuggestLocationsResult jsonGetStops(final String uri) throws IOException
{ {
final CharSequence page = ParserUtils.scrape(uri, null, jsonGetStopsEncoding); final CharSequence page = httpClient.get(uri, null, jsonGetStopsEncoding);
final Matcher mJson = P_AJAX_GET_STOPS_JSON.matcher(page); final Matcher mJson = P_AJAX_GET_STOPS_JSON.matcher(page);
if (mJson.matches()) if (mJson.matches())
@ -508,7 +509,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
try try
{ {
// work around unparsable XML // work around unparsable XML
reader = new StringReplaceReader(new InputStreamReader(ParserUtils.scrapeInputStream(uri), Charsets.ISO_8859_1), " & ", " & "); reader = new StringReplaceReader(new InputStreamReader(httpClient.getInputStream(uri), Charsets.ISO_8859_1), " & ", " & ");
reader.replace("<b>", " "); reader.replace("<b>", " ");
reader.replace("</b>", " "); reader.replace("</b>", " ");
reader.replace("<u>", " "); reader.replace("<u>", " ");
@ -885,8 +886,8 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
try try
{ {
final String endpoint = extXmlEndpoint != null ? extXmlEndpoint : queryEndpoint; final String endpoint = extXmlEndpoint != null ? extXmlEndpoint : queryEndpoint;
final InputStream is = ParserUtils.scrapeInputStream(endpoint, request, null, null, sessionCookieName); final InputStream is = httpClient.getInputStream(endpoint, request, null, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = HttpClient.peekFirstChars(is);
reader = new InputStreamReader(is, Charsets.ISO_8859_1); reader = new InputStreamReader(is, Charsets.ISO_8859_1);
final XmlPullParserFactory factory = XmlPullParserFactory.newInstance(System.getProperty(XmlPullParserFactory.PROPERTY_NAME), null); final XmlPullParserFactory factory = XmlPullParserFactory.newInstance(System.getProperty(XmlPullParserFactory.PROPERTY_NAME), null);
@ -1531,8 +1532,8 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
try try
{ {
final CustomBufferedInputStream bis = new CustomBufferedInputStream(ParserUtils.scrapeInputStream(uri, sessionCookieName)); final CustomBufferedInputStream bis = new CustomBufferedInputStream(httpClient.getInputStream(uri));
final String firstChars = ParserUtils.peekFirstChars(bis); final String firstChars = HttpClient.peekFirstChars(bis);
// initialize input stream // initialize input stream
is = new LittleEndianDataInputStream(bis); is = new LittleEndianDataInputStream(bis);
@ -2356,7 +2357,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
protected final NearbyLocationsResult xmlNearbyStations(final String uri) throws IOException protected final NearbyLocationsResult xmlNearbyStations(final String uri) throws IOException
{ {
// scrape page // scrape page
final CharSequence page = ParserUtils.scrape(uri); final CharSequence page = httpClient.get(uri);
final List<Location> stations = new ArrayList<Location>(); final List<Location> stations = new ArrayList<Location>();
@ -2441,7 +2442,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
protected final NearbyLocationsResult jsonNearbyLocations(final String uri) throws IOException protected final NearbyLocationsResult jsonNearbyLocations(final String uri) throws IOException
{ {
final CharSequence page = ParserUtils.scrape(uri, null, jsonNearbyLocationsEncoding); final CharSequence page = httpClient.get(uri, null, jsonNearbyLocationsEncoding);
try try
{ {
@ -2526,7 +2527,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
{ {
final List<Location> stations = new ArrayList<Location>(); final List<Location> stations = new ArrayList<Location>();
final CharSequence page = ParserUtils.scrape(uri); final CharSequence page = httpClient.get(uri);
String oldZebra = null; String oldZebra = null;
final Matcher mCoarse = htmlNearbyStationsPattern.matcher(page); final Matcher mCoarse = htmlNearbyStationsPattern.matcher(page);

View file

@ -661,7 +661,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
private JSONObject getLinePhysicalMode(final String lineId) throws IOException private JSONObject getLinePhysicalMode(final String lineId) throws IOException
{ {
final String uri = uri() + "lines/" + ParserUtils.urlEncode(lineId) + "/physical_modes"; final String uri = uri() + "lines/" + ParserUtils.urlEncode(lineId) + "/physical_modes";
final CharSequence page = ParserUtils.scrape(uri, authorization); final CharSequence page = httpClient.get(uri, authorization);
try try
{ {
@ -700,7 +700,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
private List<LineDestination> getStationLines(final String stopPointId) throws IOException private List<LineDestination> getStationLines(final String stopPointId) throws IOException
{ {
final String uri = uri() + "stop_points/" + ParserUtils.urlEncode(stopPointId) + "/routes?depth=2"; final String uri = uri() + "stop_points/" + ParserUtils.urlEncode(stopPointId) + "/routes?depth=2";
final CharSequence page = ParserUtils.scrape(uri, authorization); final CharSequence page = httpClient.get(uri, authorization);
try try
{ {
@ -727,7 +727,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
private String getStopAreaId(final String stopPointId) throws IOException private String getStopAreaId(final String stopPointId) throws IOException
{ {
final String uri = uri() + "stop_points/" + ParserUtils.urlEncode(stopPointId) + "?depth=1"; final String uri = uri() + "stop_points/" + ParserUtils.urlEncode(stopPointId) + "?depth=1";
final CharSequence page = ParserUtils.scrape(uri, authorization); final CharSequence page = httpClient.get(uri, authorization);
try try
{ {
@ -794,7 +794,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
final String queryUri = uri() + queryUriType + "places_nearby?type[]=stop_point" + "&distance=" + maxDistance + "&count=" + maxLocations final String queryUri = uri() + queryUriType + "places_nearby?type[]=stop_point" + "&distance=" + maxDistance + "&count=" + maxLocations
+ "&depth=0"; + "&depth=0";
final CharSequence page = ParserUtils.scrape(queryUri, authorization); final CharSequence page = httpClient.get(queryUri, authorization);
try try
{ {
@ -871,7 +871,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
} }
queryUri.append("departures?from_datetime=" + dateTime + "&count=" + maxDepartures + "&duration=3600" + "&depth=0"); queryUri.append("departures?from_datetime=" + dateTime + "&count=" + maxDepartures + "&duration=3600" + "&depth=0");
final CharSequence page = ParserUtils.scrape(queryUri.toString(), authorization); final CharSequence page = httpClient.get(queryUri.toString(), authorization);
final JSONObject head = new JSONObject(page.toString()); final JSONObject head = new JSONObject(page.toString());
@ -960,7 +960,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
final String nameCstr = constraint.toString(); final String nameCstr = constraint.toString();
final String queryUri = uri() + "places?q=" + ParserUtils.urlEncode(nameCstr) + "&type[]=stop_area&type[]=address" + "&depth=1"; final String queryUri = uri() + "places?q=" + ParserUtils.urlEncode(nameCstr) + "&type[]=stop_area&type[]=address" + "&depth=1";
final CharSequence page = ParserUtils.scrape(queryUri, authorization); final CharSequence page = httpClient.get(queryUri, authorization);
try try
{ {
@ -1081,7 +1081,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
} }
} }
final CharSequence page = ParserUtils.scrape(queryUri.toString(), authorization); final CharSequence page = httpClient.get(queryUri.toString(), authorization);
try try
{ {
@ -1177,7 +1177,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
final Location from = context.from; final Location from = context.from;
final Location to = context.to; final Location to = context.to;
final String queryUri = later ? context.nextQueryUri : context.prevQueryUri; final String queryUri = later ? context.nextQueryUri : context.prevQueryUri;
final CharSequence page = ParserUtils.scrape(queryUri, authorization); final CharSequence page = httpClient.get(queryUri, authorization);
try try
{ {
@ -1214,7 +1214,7 @@ public abstract class AbstractNavitiaProvider extends AbstractNetworkProvider
public Point[] getArea() throws IOException public Point[] getArea() throws IOException
{ {
final String queryUri = uri(); final String queryUri = uri();
final CharSequence page = ParserUtils.scrape(queryUri, authorization); final CharSequence page = httpClient.get(queryUri, authorization);
try try
{ {

View file

@ -33,6 +33,7 @@ import de.schildbach.pte.dto.Point;
import de.schildbach.pte.dto.Position; import de.schildbach.pte.dto.Position;
import de.schildbach.pte.dto.Product; import de.schildbach.pte.dto.Product;
import de.schildbach.pte.dto.Style; import de.schildbach.pte.dto.Style;
import de.schildbach.pte.util.HttpClient;
/** /**
* @author Andreas Schildbach * @author Andreas Schildbach
@ -40,11 +41,11 @@ import de.schildbach.pte.dto.Style;
public abstract class AbstractNetworkProvider implements NetworkProvider public abstract class AbstractNetworkProvider implements NetworkProvider
{ {
protected final NetworkId network; protected final NetworkId network;
protected final HttpClient httpClient = new HttpClient();
protected TimeZone timeZone = TimeZone.getTimeZone("CET"); protected TimeZone timeZone = TimeZone.getTimeZone("CET");
protected int numTripsRequested = 6; protected int numTripsRequested = 6;
private @Nullable Map<String, Style> styles = null; private @Nullable Map<String, Style> styles = null;
protected @Nullable String sessionCookieName = null;
protected static final Set<Product> ALL_EXCEPT_HIGHSPEED = EnumSet.complementOf(EnumSet.of(Product.HIGH_SPEED_TRAIN)); protected static final Set<Product> ALL_EXCEPT_HIGHSPEED = EnumSet.complementOf(EnumSet.of(Product.HIGH_SPEED_TRAIN));
@ -74,6 +75,11 @@ public abstract class AbstractNetworkProvider implements NetworkProvider
return ALL_EXCEPT_HIGHSPEED; return ALL_EXCEPT_HIGHSPEED;
} }
public void setUserAgent(final String userAgent)
{
httpClient.setUserAgent(userAgent);
}
protected void setTimeZone(final String timeZoneId) protected void setTimeZone(final String timeZoneId)
{ {
this.timeZone = TimeZone.getTimeZone(timeZoneId); this.timeZone = TimeZone.getTimeZone(timeZoneId);
@ -91,7 +97,7 @@ public abstract class AbstractNetworkProvider implements NetworkProvider
protected void setSessionCookieName(final String sessionCookieName) protected void setSessionCookieName(final String sessionCookieName)
{ {
this.sessionCookieName = sessionCookieName; httpClient.setSessionCookieName(sessionCookieName);
} }
private static final char STYLES_SEP = '|'; private static final char STYLES_SEP = '|';

View file

@ -208,7 +208,7 @@ public abstract class AbstractTsiProvider extends AbstractNetworkProvider
final StringBuilder uri = new StringBuilder(stopFinderEndpoint); final StringBuilder uri = new StringBuilder(stopFinderEndpoint);
uri.append(parameters); uri.append(parameters);
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {
final List<SuggestedLocation> locations = new ArrayList<SuggestedLocation>(); final List<SuggestedLocation> locations = new ArrayList<SuggestedLocation>();
@ -299,7 +299,7 @@ public abstract class AbstractTsiProvider extends AbstractNetworkProvider
final StringBuilder uri = new StringBuilder(stopFinderEndpoint); final StringBuilder uri = new StringBuilder(stopFinderEndpoint);
uri.append(parameters); uri.append(parameters);
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {
final List<Location> stations = new ArrayList<Location>(); final List<Location> stations = new ArrayList<Location>();
@ -344,7 +344,7 @@ public abstract class AbstractTsiProvider extends AbstractNetworkProvider
final StringBuilder uri = new StringBuilder(stopFinderEndpoint); final StringBuilder uri = new StringBuilder(stopFinderEndpoint);
uri.append(parameters); uri.append(parameters);
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {
final JSONObject head = new JSONObject(page.toString()); final JSONObject head = new JSONObject(page.toString());
@ -761,7 +761,7 @@ public abstract class AbstractTsiProvider extends AbstractNetworkProvider
final StringBuilder uri = new StringBuilder(tripEndpoint); final StringBuilder uri = new StringBuilder(tripEndpoint);
uri.append(parameters); uri.append(parameters);
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8, null); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {
final JSONObject head = new JSONObject(page.toString()); final JSONObject head = new JSONObject(page.toString());

View file

@ -186,7 +186,7 @@ public class InvgProvider extends AbstractHafasProvider
// scrape page // scrape page
final StringBuilder uri = new StringBuilder(stationBoardEndpoint); final StringBuilder uri = new StringBuilder(stationBoardEndpoint);
appendXmlStationBoardParameters(uri, time, stationId, maxDepartures, false, null); appendXmlStationBoardParameters(uri, time, stationId, maxDepartures, false, null);
final CharSequence page = ParserUtils.scrape(uri.toString()); final CharSequence page = httpClient.get(uri.toString());
// parse page // parse page
final Matcher mHeadCoarse = P_DEPARTURES_HEAD_COARSE.matcher(page); final Matcher mHeadCoarse = P_DEPARTURES_HEAD_COARSE.matcher(page);

View file

@ -173,7 +173,7 @@ public class SeptaProvider extends AbstractHafasProvider
// scrape page // scrape page
final StringBuilder uri = new StringBuilder(stationBoardEndpoint); final StringBuilder uri = new StringBuilder(stationBoardEndpoint);
appendXmlStationBoardParameters(uri, time, stationId, maxDepartures, false, null); appendXmlStationBoardParameters(uri, time, stationId, maxDepartures, false, null);
final CharSequence page = ParserUtils.scrape(uri.toString()); final CharSequence page = httpClient.get(uri.toString());
// parse page // parse page
final Matcher mPageCoarse = P_DEPARTURES_PAGE_COARSE.matcher(page); final Matcher mPageCoarse = P_DEPARTURES_PAGE_COARSE.matcher(page);

View file

@ -392,7 +392,7 @@ public class VrsProvider extends AbstractNetworkProvider
uri.append("&s=").append(Math.min(16, maxLocations)); // artificial server limit uri.append("&s=").append(Math.min(16, maxLocations)); // artificial server limit
} }
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {
@ -456,7 +456,7 @@ public class VrsProvider extends AbstractNetworkProvider
uri.append("&t="); uri.append("&t=");
appendDate(uri, time); appendDate(uri, time);
} }
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {
@ -554,7 +554,7 @@ public class VrsProvider extends AbstractNetworkProvider
final StringBuilder uri = new StringBuilder(API_BASE); final StringBuilder uri = new StringBuilder(API_BASE);
uri.append("?eID=tx_vrsinfo_his_info&i=").append(ParserUtils.urlEncode(stationId)); uri.append("?eID=tx_vrsinfo_his_info&i=").append(ParserUtils.urlEncode(stationId));
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {
@ -623,7 +623,7 @@ public class VrsProvider extends AbstractNetworkProvider
final String uri = API_BASE + "?eID=tx_vrsinfo_ass2_objects&sc=" + sc + "&ac=" + ac + "&pc=" + ac + "&t=sap&q=" final String uri = API_BASE + "?eID=tx_vrsinfo_ass2_objects&sc=" + sc + "&ac=" + ac + "&pc=" + ac + "&t=sap&q="
+ ParserUtils.urlEncode(new Location(LocationType.ANY, null, null, constraint.toString()).name); + ParserUtils.urlEncode(new Location(LocationType.ANY, null, null, constraint.toString()).name);
final CharSequence page = ParserUtils.scrape(uri, null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri, null, Charsets.UTF_8);
try try
{ {
@ -751,7 +751,7 @@ public class VrsProvider extends AbstractNetworkProvider
uri.append("p"); uri.append("p");
} }
final CharSequence page = ParserUtils.scrape(uri.toString(), null, Charsets.UTF_8); final CharSequence page = httpClient.get(uri.toString(), null, Charsets.UTF_8);
try try
{ {

View file

@ -21,7 +21,7 @@ import java.io.IOException;
import java.io.Reader; import java.io.Reader;
import java.net.URL; import java.net.URL;
import de.schildbach.pte.util.ParserUtils; import de.schildbach.pte.util.HttpClient;
/** /**
* @author Andreas Schildbach * @author Andreas Schildbach
@ -61,8 +61,8 @@ public abstract class AbstractHttpException extends ParserException
if (errorReader == null) if (errorReader == null)
return null; return null;
final StringBuilder error = new StringBuilder(ParserUtils.SCRAPE_INITIAL_CAPACITY); final StringBuilder error = new StringBuilder(HttpClient.SCRAPE_INITIAL_CAPACITY);
ParserUtils.copy(errorReader, error); HttpClient.copy(errorReader, error);
errorReader.close(); errorReader.close();
return error; return error;

View file

@ -0,0 +1,355 @@
/*
* Copyright 2015 the original author or authors.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.schildbach.pte.util;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader;
import java.net.HttpCookie;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.charset.Charset;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import javax.annotation.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Charsets;
import de.schildbach.pte.exception.BlockedException;
import de.schildbach.pte.exception.InternalErrorException;
import de.schildbach.pte.exception.NotFoundException;
import de.schildbach.pte.exception.SessionExpiredException;
import de.schildbach.pte.exception.UnexpectedRedirectException;
/**
* @author Andreas Schildbach
*/
public final class HttpClient
{
@Nullable
private String userAgent = null;
@Nullable
private String sessionCookieName = null;
@Nullable
private HttpCookie sessionCookie = null;
private static final String SCRAPE_ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
public static final int SCRAPE_INITIAL_CAPACITY = 4096;
private static final int SCRAPE_COPY_SIZE = 2048;
private static final int SCRAPE_PEEK_SIZE = 4096;
private static final int SCRAPE_CONNECT_TIMEOUT = 5000;
private static final int SCRAPE_READ_TIMEOUT = 15000;
private static final Logger log = LoggerFactory.getLogger(HttpClient.class);
public void setUserAgent(final String userAgent)
{
this.userAgent = userAgent;
}
public void setSessionCookieName(final String sessionCookieName)
{
this.sessionCookieName = sessionCookieName;
}
public CharSequence get(final String url) throws IOException
{
return get(url, null);
}
public CharSequence get(final String url, final String authorization) throws IOException
{
return get(url, null, null, authorization);
}
public CharSequence get(final String urlStr, final String postRequest, final Charset requestEncoding) throws IOException
{
return get(urlStr, postRequest, requestEncoding, null);
}
private CharSequence get(final String urlStr, final String postRequest, Charset requestEncoding, final String authorization) throws IOException
{
if (requestEncoding == null)
requestEncoding = Charsets.ISO_8859_1;
final StringBuilder buffer = new StringBuilder(SCRAPE_INITIAL_CAPACITY);
final InputStream is = getInputStream(urlStr, postRequest, requestEncoding, null, authorization);
final Reader pageReader = new InputStreamReader(is, requestEncoding);
copy(pageReader, buffer);
pageReader.close();
return buffer;
}
public InputStream getInputStream(final String url) throws IOException
{
return getInputStream(url, null, null, null);
}
public InputStream getInputStream(final String urlStr, final String postRequest, final Charset requestEncoding, final String referer)
throws IOException
{
return getInputStream(urlStr, postRequest, requestEncoding, referer, null);
}
public InputStream getInputStream(final String urlStr, final String postRequest, Charset requestEncoding, final String referer,
final String authorization) throws IOException
{
log.debug("{}: {}", postRequest != null ? "POST" : "GET", urlStr);
if (requestEncoding == null)
requestEncoding = Charsets.ISO_8859_1;
int tries = 3;
while (true)
{
final URL url = new URL(urlStr);
final HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setDoInput(true);
connection.setDoOutput(postRequest != null);
connection.setConnectTimeout(SCRAPE_CONNECT_TIMEOUT);
connection.setReadTimeout(SCRAPE_READ_TIMEOUT);
if (userAgent != null)
connection.addRequestProperty("User-Agent", userAgent);
connection.addRequestProperty("Accept", SCRAPE_ACCEPT);
connection.addRequestProperty("Accept-Encoding", "gzip");
// workaround to disable Vodafone compression
connection.addRequestProperty("Cache-Control", "no-cache");
if (referer != null)
connection.addRequestProperty("Referer", referer);
final HttpCookie sessionCookie = this.sessionCookie;
if (sessionCookie != null && sessionCookie.getName().equals(sessionCookieName))
connection.addRequestProperty("Cookie", sessionCookie.toString());
// Set authorization.
if (authorization != null)
connection.addRequestProperty("Authorization", authorization);
if (postRequest != null)
{
final byte[] postRequestBytes = postRequest.getBytes(requestEncoding.name());
connection.setRequestMethod("POST");
connection.addRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.addRequestProperty("Content-Length", Integer.toString(postRequestBytes.length));
final OutputStream os = connection.getOutputStream();
os.write(postRequestBytes);
os.close();
}
final int responseCode = connection.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK)
{
final String contentType = connection.getContentType();
final String contentEncoding = connection.getContentEncoding();
InputStream is = new BufferedInputStream(connection.getInputStream());
if ("gzip".equalsIgnoreCase(contentEncoding) || "application/octet-stream".equalsIgnoreCase(contentType))
is = wrapGzip(is);
if (!url.getHost().equals(connection.getURL().getHost()))
throw new UnexpectedRedirectException(url, connection.getURL());
final String firstChars = peekFirstChars(is);
final URL redirectUrl = testRedirect(url, firstChars);
if (redirectUrl != null)
throw new UnexpectedRedirectException(url, redirectUrl);
if (testExpired(firstChars))
throw new SessionExpiredException();
if (testInternalError(firstChars))
throw new InternalErrorException(url, new InputStreamReader(is, requestEncoding));
// save cookie
if (sessionCookieName != null)
{
c: for (final Map.Entry<String, List<String>> entry : connection.getHeaderFields().entrySet())
{
if ("set-cookie".equalsIgnoreCase(entry.getKey()) || "set-cookie2".equalsIgnoreCase(entry.getKey()))
{
for (final String value : entry.getValue())
{
for (final HttpCookie cookie : HttpCookie.parse(value))
{
if (cookie.getName().equals(sessionCookieName))
{
this.sessionCookie = cookie;
break c;
}
}
}
}
}
}
return is;
}
else if (responseCode == HttpURLConnection.HTTP_BAD_REQUEST || responseCode == HttpURLConnection.HTTP_UNAUTHORIZED
|| responseCode == HttpURLConnection.HTTP_FORBIDDEN || responseCode == HttpURLConnection.HTTP_NOT_ACCEPTABLE
|| responseCode == HttpURLConnection.HTTP_UNAVAILABLE)
{
throw new BlockedException(url, new InputStreamReader(connection.getErrorStream(), requestEncoding));
}
else if (responseCode == HttpURLConnection.HTTP_NOT_FOUND)
{
throw new NotFoundException(url, new InputStreamReader(connection.getErrorStream(), requestEncoding));
}
else if (responseCode == HttpURLConnection.HTTP_MOVED_PERM || responseCode == HttpURLConnection.HTTP_MOVED_TEMP)
{
throw new UnexpectedRedirectException(url, connection.getURL());
}
else if (responseCode == HttpURLConnection.HTTP_INTERNAL_ERROR)
{
throw new InternalErrorException(url, new InputStreamReader(connection.getErrorStream(), requestEncoding));
}
else
{
final String message = "got response: " + responseCode + " " + connection.getResponseMessage();
if (tries-- > 0)
log.info("{}, retrying...", message);
else
throw new IOException(message + ": " + url);
}
}
}
public static final long copy(final Reader reader, final StringBuilder builder) throws IOException
{
final char[] buffer = new char[SCRAPE_COPY_SIZE];
long count = 0;
int n = 0;
while (-1 != (n = reader.read(buffer)))
{
builder.append(buffer, 0, n);
count += n;
}
return count;
}
private static InputStream wrapGzip(final InputStream is) throws IOException
{
is.mark(2);
final int byte0 = is.read();
final int byte1 = is.read();
is.reset();
// check for gzip header
if (byte0 == 0x1f && byte1 == 0x8b)
{
final BufferedInputStream is2 = new BufferedInputStream(new GZIPInputStream(is));
is2.mark(2);
final int byte0_2 = is2.read();
final int byte1_2 = is2.read();
is2.reset();
// check for gzip header again
if (byte0_2 == 0x1f && byte1_2 == 0x8b)
{
// double gzipped
return new BufferedInputStream(new GZIPInputStream(is2));
}
else
{
// gzipped
return is2;
}
}
else
{
// uncompressed
return is;
}
}
public static String peekFirstChars(final InputStream is) throws IOException
{
is.mark(SCRAPE_PEEK_SIZE);
final byte[] firstBytes = new byte[SCRAPE_PEEK_SIZE];
final int read = is.read(firstBytes);
if (read == -1)
return "";
is.reset();
return new String(firstBytes, 0, read).replaceAll("\\p{C}", "");
}
private static final Pattern P_REDIRECT_HTTP_EQUIV = Pattern.compile("<META\\s+http-equiv=\"?refresh\"?\\s+content=\"\\d+;\\s*URL=([^\"]+)\"",
Pattern.CASE_INSENSITIVE);
private static final Pattern P_REDIRECT_SCRIPT = Pattern.compile(
"<script\\s+(?:type=\"text/javascript\"|language=\"javascript\")>\\s*(?:window.location|location.href)\\s*=\\s*\"([^\"]+)\"",
Pattern.CASE_INSENSITIVE);
public static URL testRedirect(final URL context, final String content) throws MalformedURLException
{
// check for redirect by http-equiv meta tag header
final Matcher mHttpEquiv = P_REDIRECT_HTTP_EQUIV.matcher(content);
if (mHttpEquiv.find())
return new URL(context, mHttpEquiv.group(1));
// check for redirect by window.location javascript
final Matcher mScript = P_REDIRECT_SCRIPT.matcher(content);
if (mScript.find())
return new URL(context, mScript.group(1));
return null;
}
private static final Pattern P_EXPIRED = Pattern
.compile(">\\s*(Your session has expired\\.|Session Expired|Ihre Verbindungskennung ist nicht mehr g.ltig\\.)\\s*<");
public static boolean testExpired(final String content)
{
// check for expired session
final Matcher mSessionExpired = P_EXPIRED.matcher(content);
if (mSessionExpired.find())
return true;
return false;
}
private static final Pattern P_INTERNAL_ERROR = Pattern
.compile(">\\s*(Internal Error|Server ein Fehler aufgetreten|Internal error in gateway|VRN - Keine Verbindung zum Server m.glich)\\s*<");
public static boolean testInternalError(final String content)
{
// check for internal error
final Matcher m = P_INTERNAL_ERROR.matcher(content);
if (m.find())
return true;
return false;
}
}

View file

@ -17,341 +17,23 @@
package de.schildbach.pte.util; package de.schildbach.pte.util;
import java.io.BufferedInputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.Reader; import java.io.Reader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.HttpCookie;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLDecoder; import java.net.URLDecoder;
import java.net.URLEncoder; import java.net.URLEncoder;
import java.nio.charset.Charset; import java.nio.charset.Charset;
import java.util.Calendar; import java.util.Calendar;
import java.util.Date; import java.util.Date;
import java.util.GregorianCalendar; import java.util.GregorianCalendar;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher; import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
import java.util.zip.GZIPInputStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Charsets;
import de.schildbach.pte.exception.BlockedException;
import de.schildbach.pte.exception.InternalErrorException;
import de.schildbach.pte.exception.NotFoundException;
import de.schildbach.pte.exception.SessionExpiredException;
import de.schildbach.pte.exception.UnexpectedRedirectException;
/** /**
* @author Andreas Schildbach * @author Andreas Schildbach
*/ */
public final class ParserUtils public final class ParserUtils
{ {
private static final String SCRAPE_USER_AGENT = "Mozilla/5.0 (Linux; Android 4.4.4; Nexus 7 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.93 Safari/537.36";
private static final String SCRAPE_ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8";
public static final int SCRAPE_INITIAL_CAPACITY = 4096;
private static final int SCRAPE_COPY_SIZE = 2048;
private static final int SCRAPE_PEEK_SIZE = 4096;
private static final int SCRAPE_CONNECT_TIMEOUT = 5000;
private static final int SCRAPE_READ_TIMEOUT = 15000;
private static HttpCookie sessionCookie;
private static final Logger log = LoggerFactory.getLogger(ParserUtils.class);
public static final CharSequence scrape(final String url) throws IOException
{
return scrape(url, null);
}
public static final CharSequence scrape(final String url, final String authorization) throws IOException
{
return scrape(url, null, null, null, authorization);
}
public static final CharSequence scrape(final String url, final String postRequest, final Charset encoding) throws IOException
{
return scrape(url, postRequest, encoding, null);
}
public static final CharSequence scrape(final String urlStr, final String postRequest, final Charset requestEncoding,
final String sessionCookieName) throws IOException
{
return scrape(urlStr, postRequest, requestEncoding, sessionCookieName, null);
}
private static final CharSequence scrape(final String urlStr, final String postRequest, Charset requestEncoding, final String sessionCookieName,
final String authorization) throws IOException
{
if (requestEncoding == null)
requestEncoding = Charsets.ISO_8859_1;
final StringBuilder buffer = new StringBuilder(SCRAPE_INITIAL_CAPACITY);
final InputStream is = scrapeInputStream(urlStr, postRequest, requestEncoding, null, sessionCookieName, authorization);
final Reader pageReader = new InputStreamReader(is, requestEncoding);
copy(pageReader, buffer);
pageReader.close();
return buffer;
}
public static final long copy(final Reader reader, final StringBuilder builder) throws IOException
{
final char[] buffer = new char[SCRAPE_COPY_SIZE];
long count = 0;
int n = 0;
while (-1 != (n = reader.read(buffer)))
{
builder.append(buffer, 0, n);
count += n;
}
return count;
}
public static final InputStream scrapeInputStream(final String url) throws IOException
{
return scrapeInputStream(url, null);
}
public static final InputStream scrapeInputStream(final String url, final String sessionCookieName) throws IOException
{
return scrapeInputStream(url, null, null, null, sessionCookieName);
}
public static final InputStream scrapeInputStream(final String urlStr, final String postRequest, final Charset requestEncoding,
final String referer, final String sessionCookieName) throws IOException
{
return scrapeInputStream(urlStr, postRequest, requestEncoding, referer, sessionCookieName, null);
}
public static final InputStream scrapeInputStream(final String urlStr, final String postRequest, Charset requestEncoding, final String referer,
final String sessionCookieName, final String authorization) throws IOException
{
log.debug("{}: {}", postRequest != null ? "POST" : "GET", urlStr);
if (requestEncoding == null)
requestEncoding = Charsets.ISO_8859_1;
int tries = 3;
while (true)
{
final URL url = new URL(urlStr);
final HttpURLConnection connection = (HttpURLConnection) url.openConnection();
connection.setDoInput(true);
connection.setDoOutput(postRequest != null);
connection.setConnectTimeout(SCRAPE_CONNECT_TIMEOUT);
connection.setReadTimeout(SCRAPE_READ_TIMEOUT);
connection.addRequestProperty("User-Agent", SCRAPE_USER_AGENT);
connection.addRequestProperty("Accept", SCRAPE_ACCEPT);
connection.addRequestProperty("Accept-Encoding", "gzip");
// workaround to disable Vodafone compression
connection.addRequestProperty("Cache-Control", "no-cache");
if (referer != null)
connection.addRequestProperty("Referer", referer);
if (sessionCookie != null && sessionCookie.getName().equals(sessionCookieName))
connection.addRequestProperty("Cookie", sessionCookie.toString());
// Set authorization.
if (authorization != null)
connection.addRequestProperty("Authorization", authorization);
if (postRequest != null)
{
final byte[] postRequestBytes = postRequest.getBytes(requestEncoding.name());
connection.setRequestMethod("POST");
connection.addRequestProperty("Content-Type", "application/x-www-form-urlencoded");
connection.addRequestProperty("Content-Length", Integer.toString(postRequestBytes.length));
final OutputStream os = connection.getOutputStream();
os.write(postRequestBytes);
os.close();
}
final int responseCode = connection.getResponseCode();
if (responseCode == HttpURLConnection.HTTP_OK)
{
final String contentType = connection.getContentType();
final String contentEncoding = connection.getContentEncoding();
InputStream is = new BufferedInputStream(connection.getInputStream());
if ("gzip".equalsIgnoreCase(contentEncoding) || "application/octet-stream".equalsIgnoreCase(contentType))
is = wrapGzip(is);
if (!url.getHost().equals(connection.getURL().getHost()))
throw new UnexpectedRedirectException(url, connection.getURL());
final String firstChars = peekFirstChars(is);
final URL redirectUrl = testRedirect(url, firstChars);
if (redirectUrl != null)
throw new UnexpectedRedirectException(url, redirectUrl);
if (testExpired(firstChars))
throw new SessionExpiredException();
if (testInternalError(firstChars))
throw new InternalErrorException(url, new InputStreamReader(is, requestEncoding));
// save cookie
if (sessionCookieName != null)
{
c: for (final Map.Entry<String, List<String>> entry : connection.getHeaderFields().entrySet())
{
if ("set-cookie".equalsIgnoreCase(entry.getKey()) || "set-cookie2".equalsIgnoreCase(entry.getKey()))
{
for (final String value : entry.getValue())
{
for (final HttpCookie cookie : HttpCookie.parse(value))
{
if (cookie.getName().equals(sessionCookieName))
{
sessionCookie = cookie;
break c;
}
}
}
}
}
}
return is;
}
else if (responseCode == HttpURLConnection.HTTP_BAD_REQUEST || responseCode == HttpURLConnection.HTTP_UNAUTHORIZED
|| responseCode == HttpURLConnection.HTTP_FORBIDDEN || responseCode == HttpURLConnection.HTTP_NOT_ACCEPTABLE
|| responseCode == HttpURLConnection.HTTP_UNAVAILABLE)
{
throw new BlockedException(url, new InputStreamReader(connection.getErrorStream(), requestEncoding));
}
else if (responseCode == HttpURLConnection.HTTP_NOT_FOUND)
{
throw new NotFoundException(url, new InputStreamReader(connection.getErrorStream(), requestEncoding));
}
else if (responseCode == HttpURLConnection.HTTP_MOVED_PERM || responseCode == HttpURLConnection.HTTP_MOVED_TEMP)
{
throw new UnexpectedRedirectException(url, connection.getURL());
}
else if (responseCode == HttpURLConnection.HTTP_INTERNAL_ERROR)
{
throw new InternalErrorException(url, new InputStreamReader(connection.getErrorStream(), requestEncoding));
}
else
{
final String message = "got response: " + responseCode + " " + connection.getResponseMessage();
if (tries-- > 0)
log.info("{}, retrying...", message);
else
throw new IOException(message + ": " + url);
}
}
}
private static InputStream wrapGzip(final InputStream is) throws IOException
{
is.mark(2);
final int byte0 = is.read();
final int byte1 = is.read();
is.reset();
// check for gzip header
if (byte0 == 0x1f && byte1 == 0x8b)
{
final BufferedInputStream is2 = new BufferedInputStream(new GZIPInputStream(is));
is2.mark(2);
final int byte0_2 = is2.read();
final int byte1_2 = is2.read();
is2.reset();
// check for gzip header again
if (byte0_2 == 0x1f && byte1_2 == 0x8b)
{
// double gzipped
return new BufferedInputStream(new GZIPInputStream(is2));
}
else
{
// gzipped
return is2;
}
}
else
{
// uncompressed
return is;
}
}
public static String peekFirstChars(final InputStream is) throws IOException
{
is.mark(SCRAPE_PEEK_SIZE);
final byte[] firstBytes = new byte[SCRAPE_PEEK_SIZE];
final int read = is.read(firstBytes);
if (read == -1)
return "";
is.reset();
return new String(firstBytes, 0, read).replaceAll("\\p{C}", "");
}
private static final Pattern P_REDIRECT_HTTP_EQUIV = Pattern.compile("<META\\s+http-equiv=\"?refresh\"?\\s+content=\"\\d+;\\s*URL=([^\"]+)\"",
Pattern.CASE_INSENSITIVE);
private static final Pattern P_REDIRECT_SCRIPT = Pattern.compile(
"<script\\s+(?:type=\"text/javascript\"|language=\"javascript\")>\\s*(?:window.location|location.href)\\s*=\\s*\"([^\"]+)\"",
Pattern.CASE_INSENSITIVE);
public static URL testRedirect(final URL context, final String content) throws MalformedURLException
{
// check for redirect by http-equiv meta tag header
final Matcher mHttpEquiv = P_REDIRECT_HTTP_EQUIV.matcher(content);
if (mHttpEquiv.find())
return new URL(context, mHttpEquiv.group(1));
// check for redirect by window.location javascript
final Matcher mScript = P_REDIRECT_SCRIPT.matcher(content);
if (mScript.find())
return new URL(context, mScript.group(1));
return null;
}
private static final Pattern P_EXPIRED = Pattern
.compile(">\\s*(Your session has expired\\.|Session Expired|Ihre Verbindungskennung ist nicht mehr g.ltig\\.)\\s*<");
public static boolean testExpired(final String content)
{
// check for expired session
final Matcher mSessionExpired = P_EXPIRED.matcher(content);
if (mSessionExpired.find())
return true;
return false;
}
private static final Pattern P_INTERNAL_ERROR = Pattern
.compile(">\\s*(Internal Error|Server ein Fehler aufgetreten|Internal error in gateway|VRN - Keine Verbindung zum Server m.glich)\\s*<");
public static boolean testInternalError(final String content)
{
// check for internal error
final Matcher m = P_INTERNAL_ERROR.matcher(content);
if (m.find())
return true;
return false;
}
private static final Pattern P_HTML_UNORDERED_LIST = Pattern.compile("<ul>(.*?)</ul>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); private static final Pattern P_HTML_UNORDERED_LIST = Pattern.compile("<ul>(.*?)</ul>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern P_HTML_LIST_ITEM = Pattern.compile("<li>(.*?)</li>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); private static final Pattern P_HTML_LIST_ITEM = Pattern.compile("<li>(.*?)</li>", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
private static final Pattern P_HTML_BREAKS = Pattern.compile("(<br\\s*/>)+", Pattern.DOTALL | Pattern.CASE_INSENSITIVE); private static final Pattern P_HTML_BREAKS = Pattern.compile("(<br\\s*/>)+", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);

View file

@ -29,7 +29,7 @@ import org.junit.Test;
/** /**
* @author Andreas Schildbach * @author Andreas Schildbach
*/ */
public class ParserUtilsTest public class HttpClientTest
{ {
private URL context; private URL context;
@ -42,7 +42,7 @@ public class ParserUtilsTest
@Test @Test
public void vodafoneRedirect() throws Exception public void vodafoneRedirect() throws Exception
{ {
final URL url = ParserUtils final URL url = HttpClient
.testRedirect( .testRedirect(
context, context,
"<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.1//EN \" \"http://www.openmobilealliance.org/tech/DTD/xhtml-mobile11.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\"; xml:lang=\"en\"><head><title>Vodafone Center</title><meta http-equiv=\"Cache-Control\" content=\"no-cache\"/><meta http-equiv=\"refresh\" content=\"1;URL=https://center.vodafone.de/vfcenter/index.html?targetUrl=http%3A%2F%2Fwww.fahrinfo-berlin.de/Fahrinfo/bin/query.bin/dn%3fstart=Suchen&REQ0JourneyStopsS0ID=A%253D1%2540L%253D9083301&REQ0JourneyStopsZ0ID=A%253D1%2540L%253D9195009&REQ0HafasSearchForw=1&REQ0JourneyDate=16.06.14&REQ0JourneyTime=16%253A32&REQ0JourneyProduct_prod_list_1=11111011&h2g-direct=11&L=vs_oeffi\"/><style type=\"text/css\">*{border:none;font-family:Arial,Helvetica,sans-serif} body{font-size:69%;line-height:140%;background-color:#F4F4F4 !important}</style></head><body><h1>Sie werden weitergeleitet ...</h1><p>Sollten Sie nicht weitergeleitet werden, klicken Sie bitte <a href=\"https://center.vodafo"); "<?xml version=\"1.0\" encoding=\"UTF-8\"?><!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.1//EN \" \"http://www.openmobilealliance.org/tech/DTD/xhtml-mobile11.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\"; xml:lang=\"en\"><head><title>Vodafone Center</title><meta http-equiv=\"Cache-Control\" content=\"no-cache\"/><meta http-equiv=\"refresh\" content=\"1;URL=https://center.vodafone.de/vfcenter/index.html?targetUrl=http%3A%2F%2Fwww.fahrinfo-berlin.de/Fahrinfo/bin/query.bin/dn%3fstart=Suchen&REQ0JourneyStopsS0ID=A%253D1%2540L%253D9083301&REQ0JourneyStopsZ0ID=A%253D1%2540L%253D9195009&REQ0HafasSearchForw=1&REQ0JourneyDate=16.06.14&REQ0JourneyTime=16%253A32&REQ0JourneyProduct_prod_list_1=11111011&h2g-direct=11&L=vs_oeffi\"/><style type=\"text/css\">*{border:none;font-family:Arial,Helvetica,sans-serif} body{font-size:69%;line-height:140%;background-color:#F4F4F4 !important}</style></head><body><h1>Sie werden weitergeleitet ...</h1><p>Sollten Sie nicht weitergeleitet werden, klicken Sie bitte <a href=\"https://center.vodafo");
@ -52,7 +52,7 @@ public class ParserUtilsTest
public void kabelDeutschlandRedirect() throws Exception public void kabelDeutschlandRedirect() throws Exception
{ {
final URL url = ParserUtils final URL url = HttpClient
.testRedirect( .testRedirect(
context, context,
"<script type=\"text/javascript\"> window.location = \"http://www.hotspot.kabeldeutschland.de/portal/?RequestedURI=http%3A%2F%2Fwww.fahrinfo-berlin.de%2FFahrinfo%2Fbin%2Fajax-getstop.bin%2Fdny%3Fgetstop%3D1%26REQ0JourneyStopsS0A%3D255%26REQ0JourneyStopsS0G%3Dgneisenustra%25DFe%3F%26js%3Dtrue&RedirectReason=Policy&RedirectAqpId=100&DiscardAqpId=100&SubscriberId=4fa432d4a653e5f8b2acb27aa862f98d&SubscriberType=ESM&ClientIP=10.136.25.241&SystemId=10.143.181.2-1%2F2&GroupId=1&PartitionId=2&Application=Unknown&ApplicationGroup=Unknown\" </script>"); "<script type=\"text/javascript\"> window.location = \"http://www.hotspot.kabeldeutschland.de/portal/?RequestedURI=http%3A%2F%2Fwww.fahrinfo-berlin.de%2FFahrinfo%2Fbin%2Fajax-getstop.bin%2Fdny%3Fgetstop%3D1%26REQ0JourneyStopsS0A%3D255%26REQ0JourneyStopsS0G%3Dgneisenustra%25DFe%3F%26js%3Dtrue&RedirectReason=Policy&RedirectAqpId=100&DiscardAqpId=100&SubscriberId=4fa432d4a653e5f8b2acb27aa862f98d&SubscriberType=ESM&ClientIP=10.136.25.241&SystemId=10.143.181.2-1%2F2&GroupId=1&PartitionId=2&Application=Unknown&ApplicationGroup=Unknown\" </script>");
@ -63,7 +63,7 @@ public class ParserUtilsTest
@Test @Test
public void tplinkRedirect() throws Exception public void tplinkRedirect() throws Exception
{ {
final URL url = ParserUtils.testRedirect(context, final URL url = HttpClient.testRedirect(context,
"<body><script language=\"javaScript\">location.href=\"http://tplinkextender.net/\";</script></body></html>"); "<body><script language=\"javaScript\">location.href=\"http://tplinkextender.net/\";</script></body></html>");
assertNotNull(url); assertNotNull(url);
assertEquals("tplinkextender.net", url.getHost()); assertEquals("tplinkextender.net", url.getHost());
@ -72,7 +72,7 @@ public class ParserUtilsTest
@Test @Test
public void mshtmlRedirect() throws Exception public void mshtmlRedirect() throws Exception
{ {
final URL url = ParserUtils final URL url = HttpClient
.testRedirect( .testRedirect(
context, context,
"<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\"><HEAD><TITLE>HTML Redirection</TITLE><META http-equiv=Content-Type content=\"text/html; \"><META http-equiv=Refresh content=\"0;URL=/cgi-bin/index.cgi\"><META content=\"MSHTML 6.00.2900.2873\" name=GENERATOR></HEAD><BODY > <NOSCRIPT> If your browser can not redirect you to home page automatically.<br> Please click <a href=/cgi-bin/welcome.cgi?lang=0>here</a>. </NOSCRIPT></BODY></HTML>"); "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\"><html xmlns=\"http://www.w3.org/1999/xhtml\"><HEAD><TITLE>HTML Redirection</TITLE><META http-equiv=Content-Type content=\"text/html; \"><META http-equiv=Refresh content=\"0;URL=/cgi-bin/index.cgi\"><META content=\"MSHTML 6.00.2900.2873\" name=GENERATOR></HEAD><BODY > <NOSCRIPT> If your browser can not redirect you to home page automatically.<br> Please click <a href=/cgi-bin/welcome.cgi?lang=0>here</a>. </NOSCRIPT></BODY></HTML>");
@ -83,41 +83,41 @@ public class ParserUtilsTest
@Test @Test
public void efaExpired() throws Exception public void efaExpired() throws Exception
{ {
assertTrue(ParserUtils assertTrue(HttpClient
.testExpired("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; \"/><meta http-equiv=\"Expires\" content=\"0\"/><title>Efa9 Internal Error</title><style>.BOLD {font: bold large Arial;}.NORMAL {font: normal x-small Arial;}</style></head><body><div class=\"BOLD\">Internal Error</div><div class=\"NORMAL\">Your session has expired.</div><!--<p>&nbsp;</p><div class=\"NORMAL\">.\\EfaHttpServer.cpp</div><div class=\"NORMAL\">Line: 2043</div>--></body></html>")); .testExpired("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; \"/><meta http-equiv=\"Expires\" content=\"0\"/><title>Efa9 Internal Error</title><style>.BOLD {font: bold large Arial;}.NORMAL {font: normal x-small Arial;}</style></head><body><div class=\"BOLD\">Internal Error</div><div class=\"NORMAL\">Your session has expired.</div><!--<p>&nbsp;</p><div class=\"NORMAL\">.\\EfaHttpServer.cpp</div><div class=\"NORMAL\">Line: 2043</div>--></body></html>"));
} }
@Test @Test
public void tflExpired() throws Exception public void tflExpired() throws Exception
{ {
assertTrue(ParserUtils assertTrue(HttpClient
.testExpired("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><title>Session Expired</title><style type=\"text/css\">body{ font-family:Verdana, Arial, Helvetica, sans-serif}</style></head><body bgcolor=\"#FFFFFF\" leftmargin=\"0\" topmargin=\"0\" rightmargin=\"0\" bottommargin=\"0\" marginwidth=\"0\" marginheight=\"0\"><!--Logo--><table width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\"><tr> <td width=\"100%\" height=\"40\" valign=\"top\" class=\"fenster\"><table width=\"389\" height=\"40\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\"><tr> <td width=\"93\" valign=\"top\"><span>&nbsp;</span></td><td width=\"296\" valign=\"top\"><img src=\"images/logo.gif\" alt=\"\" width=\"372\" height=\"86\" border=\"0\"></td></tr></table></td></tr></table><!--/ Logo--><!--Content--><span><!--Headline--><table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"><tr><td width=\"18\" valign=\"top\"><span>&nbsp;</span></td><td width=\"566\" valign=\"top\"><span class=\"headline\"><b>Session Expire")); .testExpired("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><title>Session Expired</title><style type=\"text/css\">body{ font-family:Verdana, Arial, Helvetica, sans-serif}</style></head><body bgcolor=\"#FFFFFF\" leftmargin=\"0\" topmargin=\"0\" rightmargin=\"0\" bottommargin=\"0\" marginwidth=\"0\" marginheight=\"0\"><!--Logo--><table width=\"100%\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\"><tr> <td width=\"100%\" height=\"40\" valign=\"top\" class=\"fenster\"><table width=\"389\" height=\"40\" cellspacing=\"0\" cellpadding=\"0\" border=\"0\"><tr> <td width=\"93\" valign=\"top\"><span>&nbsp;</span></td><td width=\"296\" valign=\"top\"><img src=\"images/logo.gif\" alt=\"\" width=\"372\" height=\"86\" border=\"0\"></td></tr></table></td></tr></table><!--/ Logo--><!--Content--><span><!--Headline--><table cellspacing=\"0\" cellpadding=\"0\" border=\"0\"><tr><td width=\"18\" valign=\"top\"><span>&nbsp;</span></td><td width=\"566\" valign=\"top\"><span class=\"headline\"><b>Session Expire"));
} }
@Test @Test
public void nvbwExpired() throws Exception public void nvbwExpired() throws Exception
{ {
assertTrue(ParserUtils.testExpired("<h2>Ihre Verbindungskennung ist nicht mehr gültig.</h2>")); assertTrue(HttpClient.testExpired("<h2>Ihre Verbindungskennung ist nicht mehr gültig.</h2>"));
} }
@Test @Test
public void internalError() throws Exception public void internalError() throws Exception
{ {
assertTrue(ParserUtils assertTrue(HttpClient
.testInternalError("<?xml version=\"1.0\"?> <!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.0//EN\" \"http://www.wapforum.org/DTD/xhtml-mobile10.dtd\"> <html xmlns=\"http://www.w3.org/1999/xhtml\"> <head> <title> Internal error in gateway </title> </head> <body> <h1> Internal error in gateway </h1> </body> </html>")); .testInternalError("<?xml version=\"1.0\"?> <!DOCTYPE html PUBLIC \"-//WAPFORUM//DTD XHTML Mobile 1.0//EN\" \"http://www.wapforum.org/DTD/xhtml-mobile10.dtd\"> <html xmlns=\"http://www.w3.org/1999/xhtml\"> <head> <title> Internal error in gateway </title> </head> <body> <h1> Internal error in gateway </h1> </body> </html>"));
} }
@Test @Test
public void vgnInternalError() throws Exception public void vgnInternalError() throws Exception
{ {
assertTrue(ParserUtils assertTrue(HttpClient
.testInternalError("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\"/><meta http-equiv=\"Expires\" content=\"0\"/><title>Efa9 Internal Error</title></head><body><div style=\"font: bold large Arial;\">Internal Error</div><div style=\"font: normal x-small Arial;\">.\\EfaHttpServer.cpp</div><div style=\"font: normal x-small Arial;\">Line: 2507</div></body></html>")); .testInternalError("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=ISO-8859-1\"/><meta http-equiv=\"Expires\" content=\"0\"/><title>Efa9 Internal Error</title></head><body><div style=\"font: bold large Arial;\">Internal Error</div><div style=\"font: normal x-small Arial;\">.\\EfaHttpServer.cpp</div><div style=\"font: normal x-small Arial;\">Line: 2507</div></body></html>"));
} }
@Test @Test
public void vrnInternalError() throws Exception public void vrnInternalError() throws Exception
{ {
assertTrue(ParserUtils assertTrue(HttpClient
.testInternalError("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><title>VRN - Keine Verbindung zum Server möglich</title></head><body><center><table border=\"0\" width=\"450\" cellpadding=\"5\"><tr><td height=\"50\">&nbsp;</td></tr><tr><td align=\"center\"><img src=\"/vrn/ExceptionFiles/cookies.jpg\"></td></tr></table></center></body></html>")); .testInternalError("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"><html><head><title>VRN - Keine Verbindung zum Server möglich</title></head><body><center><table border=\"0\" width=\"450\" cellpadding=\"5\"><tr><td height=\"50\">&nbsp;</td></tr><tr><td align=\"center\"><img src=\"/vrn/ExceptionFiles/cookies.jpg\"></td></tr></table></center></body></html>"));
} }
} }