workaround for spurious pages with empty content type

git-svn-id: https://public-transport-enabler.googlecode.com/svn/trunk@390 0924bc21-9374-b0fa-ee44-9ff1593b38f0
This commit is contained in:
andreas.schildbach@gmail.com 2010-12-13 23:13:04 +00:00
parent d3df888509
commit c63dccc326

View file

@ -167,10 +167,12 @@ public final class ParserUtils
public static final InputStream scrapeInputStream(final String url) throws IOException public static final InputStream scrapeInputStream(final String url) throws IOException
{ {
return scrapeInputStream(url, null); return scrapeInputStream(url, null, 3);
} }
public static final InputStream scrapeInputStream(final String url, final String postRequest) throws IOException public static final InputStream scrapeInputStream(final String url, final String postRequest, int tries) throws IOException
{
while (true)
{ {
final HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection(); final HttpURLConnection connection = (HttpURLConnection) new URL(url).openConnection();
@ -193,8 +195,20 @@ public final class ParserUtils
writer.close(); writer.close();
} }
if (connection.getContentType() != null)
{
return connection.getInputStream(); return connection.getInputStream();
} }
else
{
final String message = "got page without content type";
if (tries-- > 0)
System.out.println(message + ", retrying...");
else
throw new IOException(message + ": " + url);
}
}
}
private static final Pattern P_ENTITY = Pattern.compile("&(?:#(x[\\da-f]+|\\d+)|(amp|quot|apos));"); private static final Pattern P_ENTITY = Pattern.compile("&(?:#(x[\\da-f]+|\\d+)|(amp|quot|apos));");