itace 发表于 2013-2-3 10:33:25

java获取网页的编码方式

public static String getEncoding(HttpURLConnection httpUrlConnection) throws IOException{
String contentEncoding=httpUrlConnection.getContentEncoding();
String contentType=httpUrlConnection.getContentType();
InputStream inputStream=httpUrlConnection.getInputStream();
if(contentEncoding==null){

int index=contentType.indexOf("=");
if(index==-1){
System.out.println("----contentType----");
return getEncodingByInputStream(inputStream);
}else{
System.out.println("----InputStream----");
String t=contentType.substring(index+1);
return t.toUpperCase();
}
}else{
System.out.println("----contentEncoding----");
return contentEncoding;
}
}
public static String getEncodingByInputStream(InputStream inputStream){
try {
StringBuffer sb=new StringBuffer("1234567");
StringBuffer sb2=new StringBuffer();
int t;
while ((t=inputStream.read())!=-1) {
sb.deleteCharAt(0);
sb.append((char)t);
if(sb.toString().toLowerCase().equals("charset")){
for (int i = 0; i < 10; i++) {
char c=(char) inputStream.read();
sb2.append(c);
}
break;
}

}
String str=sb2.toString();
int si=str.indexOf("=")+1;
int ei=str.indexOf("\"");
String encoding=str.substring(si, ei).trim().toUpperCase();
return encoding;
} catch (IOException e) {
e.printStackTrace();
return "UTF-8";
}
}
页: [1]
查看完整版本: java获取网页的编码方式