Configure session cookies for scraping.

This commit is contained in:
Andreas Schildbach 2014-12-19 17:49:28 +01:00
parent 55a30f56de
commit d69506b7c1
4 changed files with 32 additions and 21 deletions

View file

@ -914,7 +914,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, "NSC_"); is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpReferer, null);
firstChars = ParserUtils.peekFirstChars(is); firstChars = ParserUtils.peekFirstChars(is);
final XmlPullParser pp = parserFactory.newPullParser(); final XmlPullParser pp = parserFactory.newPullParser();
@ -2173,7 +2173,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, "NSC_"); is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is); firstChars = ParserUtils.peekFirstChars(is);
return queryTrips(uri.toString(), is); return queryTrips(uri.toString(), is);
@ -2212,7 +2212,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, "NSC_"); is = ParserUtils.scrapeInputStream(uri.toString(), httpPost ? parameters.substring(1) : null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is); firstChars = ParserUtils.peekFirstChars(is);
return queryTripsMobile(uri.toString(), from, via, to, is); return queryTripsMobile(uri.toString(), from, via, to, is);
@ -2244,7 +2244,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_"); is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is); firstChars = ParserUtils.peekFirstChars(is);
return queryTrips(uri.toString(), is); return queryTrips(uri.toString(), is);
@ -2276,7 +2276,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
try try
{ {
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_"); is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is); firstChars = ParserUtils.peekFirstChars(is);
is.mark(512); is.mark(512);

View file

@ -871,7 +871,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
try try
{ {
final String endpoint = extXmlEndpoint != null ? extXmlEndpoint : queryEndpoint; final String endpoint = extXmlEndpoint != null ? extXmlEndpoint : queryEndpoint;
final InputStream is = ParserUtils.scrapeInputStream(endpoint, request, null, null, null); final InputStream is = ParserUtils.scrapeInputStream(endpoint, request, null, null, sessionCookieName);
firstChars = ParserUtils.peekFirstChars(is); firstChars = ParserUtils.peekFirstChars(is);
reader = new InputStreamReader(is, ISO_8859_1); reader = new InputStreamReader(is, ISO_8859_1);
@ -1528,7 +1528,7 @@ public abstract class AbstractHafasProvider extends AbstractNetworkProvider
try try
{ {
final CustomBufferedInputStream bis = new CustomBufferedInputStream(ParserUtils.scrapeInputStream(uri)); final CustomBufferedInputStream bis = new CustomBufferedInputStream(ParserUtils.scrapeInputStream(uri, sessionCookieName));
final String firstChars = ParserUtils.peekFirstChars(bis); final String firstChars = ParserUtils.peekFirstChars(bis);
// initialize input stream // initialize input stream

View file

@ -25,7 +25,6 @@ import java.util.Map;
import java.util.Set; import java.util.Set;
import java.util.TimeZone; import java.util.TimeZone;
import de.schildbach.pte.NetworkProvider.Capability;
import de.schildbach.pte.dto.Point; import de.schildbach.pte.dto.Point;
import de.schildbach.pte.dto.Product; import de.schildbach.pte.dto.Product;
import de.schildbach.pte.dto.Style; import de.schildbach.pte.dto.Style;
@ -42,6 +41,7 @@ public abstract class AbstractNetworkProvider implements NetworkProvider
protected TimeZone timeZone = TimeZone.getTimeZone("CET"); protected TimeZone timeZone = TimeZone.getTimeZone("CET");
protected int numTripsRequested = 6; protected int numTripsRequested = 6;
private Map<String, Style> styles = null; private Map<String, Style> styles = null;
protected String sessionCookieName = null;
static static
{ {
@ -80,6 +80,11 @@ public abstract class AbstractNetworkProvider implements NetworkProvider
this.styles = styles; this.styles = styles;
} }
protected void setSessionCookieName(final String sessionCookieName)
{
this.sessionCookieName = sessionCookieName;
}
private static final char STYLES_SEP = '|'; private static final char STYLES_SEP = '|';
public Style lineStyle(final String network, final String line) public Style lineStyle(final String network, final String line)

View file

@ -24,6 +24,7 @@ import java.io.InputStreamReader;
import java.io.OutputStream; import java.io.OutputStream;
import java.io.Reader; import java.io.Reader;
import java.io.UnsupportedEncodingException; import java.io.UnsupportedEncodingException;
import java.net.HttpCookie;
import java.net.HttpURLConnection; import java.net.HttpURLConnection;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import java.net.URL; import java.net.URL;
@ -59,12 +60,7 @@ public final class ParserUtils
private static final int SCRAPE_READ_TIMEOUT = 15000; private static final int SCRAPE_READ_TIMEOUT = 15000;
private static final Charset SCRAPE_DEFAULT_ENCODING = Charset.forName("ISO-8859-1"); private static final Charset SCRAPE_DEFAULT_ENCODING = Charset.forName("ISO-8859-1");
private static String stateCookie; private static HttpCookie sessionCookie;
public static void resetState()
{
stateCookie = null;
}
public static final CharSequence scrape(final String url) throws IOException public static final CharSequence scrape(final String url) throws IOException
{ {
@ -116,7 +112,12 @@ public final class ParserUtils
public static final InputStream scrapeInputStream(final String url) throws IOException public static final InputStream scrapeInputStream(final String url) throws IOException
{ {
return scrapeInputStream(url, null, null, null, null); return scrapeInputStream(url, null);
}
public static final InputStream scrapeInputStream(final String url, final String sessionCookieName) throws IOException
{
return scrapeInputStream(url, null, null, null, sessionCookieName);
} }
public static final InputStream scrapeInputStream(final String urlStr, final String postRequest, final Charset requestEncoding, public static final InputStream scrapeInputStream(final String urlStr, final String postRequest, final Charset requestEncoding,
@ -151,8 +152,8 @@ public final class ParserUtils
if (referer != null) if (referer != null)
connection.addRequestProperty("Referer", referer); connection.addRequestProperty("Referer", referer);
if (sessionCookieName != null && stateCookie != null) if (sessionCookie != null && sessionCookie.getName().equals(sessionCookieName))
connection.addRequestProperty("Cookie", stateCookie); connection.addRequestProperty("Cookie", sessionCookie.toString());
// Set authorization. // Set authorization.
if (authorization != null) if (authorization != null)
@ -197,17 +198,22 @@ public final class ParserUtils
if (testInternalError(firstChars)) if (testInternalError(firstChars))
throw new InternalErrorException(url, new InputStreamReader(is, requestEncoding)); throw new InternalErrorException(url, new InputStreamReader(is, requestEncoding));
// save cookie
if (sessionCookieName != null) if (sessionCookieName != null)
{ {
for (final Map.Entry<String, List<String>> entry : connection.getHeaderFields().entrySet()) c: for (final Map.Entry<String, List<String>> entry : connection.getHeaderFields().entrySet())
{ {
if ("set-cookie".equalsIgnoreCase(entry.getKey())) if ("set-cookie".equalsIgnoreCase(entry.getKey()) || "set-cookie2".equalsIgnoreCase(entry.getKey()))
{ {
for (final String value : entry.getValue()) for (final String value : entry.getValue())
{ {
if (value.startsWith(sessionCookieName)) for (final HttpCookie cookie : HttpCookie.parse(value))
{ {
stateCookie = value.split(";", 2)[0]; if (cookie.getName().equals(sessionCookieName))
{
sessionCookie = cookie;
break c;
}
} }
} }
} }