From d69506b7c14fe6436630c9da83215bd683da47a5 Mon Sep 17 00:00:00 2001 From: Andreas Schildbach Date: Fri, 19 Dec 2014 17:49:28 +0100 Subject: [PATCH] Configure session cookies for scraping. --- .../schildbach/pte/AbstractEfaProvider.java | 10 +++--- .../schildbach/pte/AbstractHafasProvider.java | 4 +-- .../pte/AbstractNetworkProvider.java | 7 +++- .../de/schildbach/pte/util/ParserUtils.java | 32 +++++++++++-------- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/enabler/src/de/schildbach/pte/AbstractEfaProvider.java b/enabler/src/de/schildbach/pte/AbstractEfaProvider.java index 3217c3d7..42d12c87 100644 --- a/enabler/src/de/schildbach/pte/AbstractEfaProvider.java +++ b/enabler/src/de/schildbach/pte/AbstractEfaProvider.java @@ -914,7 +914,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider try { - is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, "NSC_"); + is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null); firstChars = ParserUtils.peekFirstChars(is); final XmlPullParser pp = parserFactory.newPullParser(); @@ -2173,7 +2173,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider try { - is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, "NSC_"); + is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName); firstChars = ParserUtils.peekFirstChars(is); return queryTrips(uri.toString(), is); @@ -2212,7 +2212,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider try { - is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, "NSC_"); + is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName); firstChars = ParserUtils.peekFirstChars(is); return queryTripsMobile(uri.toString(), from, via, to, is); @@ -2244,7 +2244,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider try { - is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_"); + is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName); firstChars = ParserUtils.peekFirstChars(is); return queryTrips(uri.toString(), is); @@ -2276,7 +2276,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider try { - is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_"); + is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName); firstChars = ParserUtils.peekFirstChars(is); is.mark(512); diff --git a/enabler/src/de/schildbach/pte/AbstractHafasProvider.java b/enabler/src/de/schildbach/pte/AbstractHafasProvider.java index 9736bd82..87741efe 100644 --- a/enabler/src/de/schildbach/pte/AbstractHafasProvider.java +++ b/enabler/src/de/schildbach/pte/AbstractHafasProvider.java @@ -871,7 +871,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider try { final String endpoint = extXmlEndpoint != null ? extXmlEndpoint : queryEndpoint; - final InputStream is = ParserUtils.scrapeInputStream(endpoint, request, null, null, null); + final InputStream is = ParserUtils.scrapeInputStream(endpoint, request, null, null, sessionCookieName); firstChars = ParserUtils.peekFirstChars(is); reader = new InputStreamReader(is, ISO_8859_1); @@ -1528,7 +1528,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider try { - final CustomBufferedInputStream bis = new CustomBufferedInputStream(ParserUtils.scrapeInputStream(uri)); + final CustomBufferedInputStream bis = new CustomBufferedInputStream(ParserUtils.scrapeInputStream(uri, sessionCookieName)); final String firstChars = ParserUtils.peekFirstChars(bis); // initialize input stream diff --git a/enabler/src/de/schildbach/pte/AbstractNetworkProvider.java b/enabler/src/de/schildbach/pte/AbstractNetworkProvider.java index 236b3f3b..53d169bd 100644 --- a/enabler/src/de/schildbach/pte/AbstractNetworkProvider.java +++ b/enabler/src/de/schildbach/pte/AbstractNetworkProvider.java @@ -25,7 +25,6 @@ import java.util.Map; import java.util.Set; import java.util.TimeZone; -import de.schildbach.pte.NetworkProvider.Capability; import de.schildbach.pte.dto.Point; import de.schildbach.pte.dto.Product; import de.schildbach.pte.dto.Style; @@ -42,6 +41,7 @@ public abstract class AbstractNetworkProvider implements NetworkProvider protected TimeZone timeZone = TimeZone.getTimeZone("CET"); protected int numTripsRequested = 6; private Map styles = null; + protected String sessionCookieName = null; static { @@ -80,6 +80,11 @@ public abstract class AbstractNetworkProvider implements NetworkProvider this.styles = styles; } + protected void setSessionCookieName(final String sessionCookieName) + { + this.sessionCookieName = sessionCookieName; + } + private static final char STYLES_SEP = '|'; public Style lineStyle(final String network, final String line) diff --git a/enabler/src/de/schildbach/pte/util/ParserUtils.java b/enabler/src/de/schildbach/pte/util/ParserUtils.java index 1446b67e..daee40f6 100644 --- a/enabler/src/de/schildbach/pte/util/ParserUtils.java +++ b/enabler/src/de/schildbach/pte/util/ParserUtils.java @@ -24,6 +24,7 @@ import java.io.InputStreamReader; import java.io.OutputStream; import java.io.Reader; import java.io.UnsupportedEncodingException; +import java.net.HttpCookie; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; @@ -59,12 +60,7 @@ public final class ParserUtils private static final int SCRAPE_READ_TIMEOUT = 15000; private static final Charset SCRAPE_DEFAULT_ENCODING = Charset.forName("ISO-8859-1"); - private static String stateCookie; - - public static void resetState() - { - stateCookie = null; - } + private static HttpCookie sessionCookie; public static final CharSequence scrape(final String url) throws IOException { @@ -116,7 +112,12 @@ public final class ParserUtils public static final InputStream scrapeInputStream(final String url) throws IOException { - return scrapeInputStream(url, null, null, null, null); + return scrapeInputStream(url, null); + } + + public static final InputStream scrapeInputStream(final String url, final String sessionCookieName) throws IOException + { + return scrapeInputStream(url, null, null, null, sessionCookieName); } public static final InputStream scrapeInputStream(final String urlStr, final String postRequest, final Charset requestEncoding, @@ -151,8 +152,8 @@ public final class ParserUtils if (referer != null) connection.addRequestProperty("Referer", referer); - if (sessionCookieName != null && stateCookie != null) - connection.addRequestProperty("Cookie", stateCookie); + if (sessionCookie != null && sessionCookie.getName().equals(sessionCookieName)) + connection.addRequestProperty("Cookie", sessionCookie.toString()); // Set authorization. if (authorization != null) @@ -197,17 +198,22 @@ public final class ParserUtils if (testInternalError(firstChars)) throw new InternalErrorException(url, new InputStreamReader(is, requestEncoding)); + // save cookie if (sessionCookieName != null) { - for (final Map.Entry> entry : connection.getHeaderFields().entrySet()) + c: for (final Map.Entry> entry : connection.getHeaderFields().entrySet()) { - if ("set-cookie".equalsIgnoreCase(entry.getKey())) + if ("set-cookie".equalsIgnoreCase(entry.getKey()) || "set-cookie2".equalsIgnoreCase(entry.getKey())) { for (final String value : entry.getValue()) { - if (value.startsWith(sessionCookieName)) + for (final HttpCookie cookie : HttpCookie.parse(value)) { - stateCookie = value.split(";", 2)[0]; + if (cookie.getName().equals(sessionCookieName)) + { + sessionCookie = cookie; + break c; + } } } }