mirror of
https://gitlab.com/oeffi/public-transport-enabler.git
synced 2025-07-14 16:40:30 +00:00
Make sure scraped input streams are always buffered, so they can be peeked into.
This commit is contained in:
parent
8bcdc7aa14
commit
7fcaf904dd
2 changed files with 47 additions and 71 deletions
|
@ -17,7 +17,6 @@
|
|||
|
||||
package de.schildbach.pte;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
@ -2217,7 +2216,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
|
|||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = new BufferedInputStream(ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_", 3));
|
||||
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_", 3);
|
||||
is.mark(512);
|
||||
|
||||
return queryTrips(uri.toString(), is);
|
||||
|
@ -2263,7 +2262,7 @@ public abstract class AbstractEfaProvider extends AbstractNetworkProvider
|
|||
InputStream is = null;
|
||||
try
|
||||
{
|
||||
is = new BufferedInputStream(ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_", 3));
|
||||
is = ParserUtils.scrapeInputStream(uri.toString(), null, null, httpRefererTrip, "NSC_", 3);
|
||||
is.mark(512);
|
||||
|
||||
return queryTripsMobile(uri.toString(), null, null, null, is);
|
||||
|
|
|
@ -125,32 +125,10 @@ public final class ParserUtils
|
|||
if (!url.getHost().equals(connection.getURL().getHost()))
|
||||
throw new UnexpectedRedirectException(url, connection.getURL());
|
||||
|
||||
final InputStream is;
|
||||
if ("gzip".equalsIgnoreCase(contentEncoding) || "application/octet-stream".equalsIgnoreCase(contentType))
|
||||
{
|
||||
final BufferedInputStream bis = new BufferedInputStream(connection.getInputStream());
|
||||
bis.mark(2);
|
||||
final int byte0 = bis.read();
|
||||
final int byte1 = bis.read();
|
||||
bis.reset();
|
||||
InputStream is = new BufferedInputStream(connection.getInputStream());
|
||||
|
||||
// check for gzip header
|
||||
if (byte0 == 0x1f && byte1 == 0x8b)
|
||||
{
|
||||
// gzipped
|
||||
is = new GZIPInputStream(bis);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uncompressed
|
||||
is = bis;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// uncompressed
|
||||
is = connection.getInputStream();
|
||||
}
|
||||
if ("gzip".equalsIgnoreCase(contentEncoding) || "application/octet-stream".equalsIgnoreCase(contentType))
|
||||
is = wrapGzip(is);
|
||||
|
||||
final Reader pageReader = new InputStreamReader(is, encoding);
|
||||
copy(pageReader, buffer);
|
||||
|
@ -293,7 +271,12 @@ public final class ParserUtils
|
|||
{
|
||||
final String contentType = connection.getContentType();
|
||||
final String contentEncoding = connection.getContentEncoding();
|
||||
final InputStream is = connection.getInputStream();
|
||||
|
||||
InputStream is = new BufferedInputStream(connection.getInputStream());
|
||||
|
||||
if ("gzip".equalsIgnoreCase(contentEncoding) || "application/octet-stream".equalsIgnoreCase(contentType))
|
||||
is = wrapGzip(is);
|
||||
|
||||
if (!url.getHost().equals(connection.getURL().getHost()))
|
||||
throw new UnexpectedRedirectException(url, connection.getURL());
|
||||
|
||||
|
@ -314,48 +297,7 @@ public final class ParserUtils
|
|||
}
|
||||
}
|
||||
|
||||
if ("gzip".equalsIgnoreCase(contentEncoding) || "application/octet-stream".equalsIgnoreCase(contentType))
|
||||
{
|
||||
final BufferedInputStream bis = new BufferedInputStream(is);
|
||||
bis.mark(2);
|
||||
final int byte0 = bis.read();
|
||||
final int byte1 = bis.read();
|
||||
bis.reset();
|
||||
|
||||
// check for gzip header
|
||||
if (byte0 == 0x1f && byte1 == 0x8b)
|
||||
{
|
||||
final InputStream gis = new GZIPInputStream(bis);
|
||||
|
||||
final BufferedInputStream bis2 = new BufferedInputStream(gis);
|
||||
bis2.mark(2);
|
||||
final int byte0_2 = bis2.read();
|
||||
final int byte1_2 = bis2.read();
|
||||
bis2.reset();
|
||||
|
||||
// check for gzip header again
|
||||
if (byte0_2 == 0x1f && byte1_2 == 0x8b)
|
||||
{
|
||||
// double gzipped
|
||||
return new GZIPInputStream(bis2);
|
||||
}
|
||||
else
|
||||
{
|
||||
// gzipped
|
||||
return bis2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// uncompressed
|
||||
return bis;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// uncompressed
|
||||
return is;
|
||||
}
|
||||
return is;
|
||||
}
|
||||
else if (responseCode == HttpURLConnection.HTTP_FORBIDDEN || responseCode == HttpURLConnection.HTTP_BAD_REQUEST
|
||||
|| responseCode == HttpURLConnection.HTTP_NOT_ACCEPTABLE || responseCode == HttpURLConnection.HTTP_UNAVAILABLE)
|
||||
|
@ -385,6 +327,41 @@ public final class ParserUtils
|
|||
}
|
||||
}
|
||||
|
||||
private static InputStream wrapGzip(final InputStream is) throws IOException
|
||||
{
|
||||
is.mark(2);
|
||||
final int byte0 = is.read();
|
||||
final int byte1 = is.read();
|
||||
is.reset();
|
||||
|
||||
// check for gzip header
|
||||
if (byte0 == 0x1f && byte1 == 0x8b)
|
||||
{
|
||||
final BufferedInputStream is2 = new BufferedInputStream(new GZIPInputStream(is));
|
||||
is2.mark(2);
|
||||
final int byte0_2 = is2.read();
|
||||
final int byte1_2 = is2.read();
|
||||
is2.reset();
|
||||
|
||||
// check for gzip header again
|
||||
if (byte0_2 == 0x1f && byte1_2 == 0x8b)
|
||||
{
|
||||
// double gzipped
|
||||
return new BufferedInputStream(new GZIPInputStream(is2));
|
||||
}
|
||||
else
|
||||
{
|
||||
// gzipped
|
||||
return is2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// uncompressed
|
||||
return is;
|
||||
}
|
||||
}
|
||||
|
||||
private static final Pattern P_ENTITY = Pattern.compile("&(?:#(x[\\da-f]+|\\d+)|(amp|quot|apos|szlig|nbsp));");
|
||||
|
||||
public static String resolveEntities(final CharSequence str)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue