现象
使用HttpClient工具上传文件时,如果文件名是中文,文件名会乱码
文件名乱码的代码:
1
2
3
4
5
6
7
8
9
10
11
12
|
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,File fileToUpload) {
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
builder.addTextBody( "scenarioId" , scenarioId.toString());
for (String groupId : groupIds) {
builder.addTextBody( "groupIds" , groupId);
}
builder.addTextBody( "extension" , extension);
builder.addPart( "fileToUpload" , new FileBody(fileToUpload));
builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName());
builder.addTextBody( "isSplit" , "false" );
builder.addTextBody( "isRefresh" , "false" );
return builder.build();
|
乱码原因:
HttpClient上传文件时,会调用doWriteTo方法,写一个输出流,但是在调用formatMultipartHeader方法时,底层主要有3种不同的实现,3种方式的采用的字符集不一样
HttpClient中的doWriteTo方法:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
void doWriteTo(
final OutputStream out,
final boolean writeContent) throws IOException {
final ByteArrayBuffer boundaryEncoded = encode( this .charset, this .boundary);
for ( final FormBodyPart part: getBodyParts()) {
writeBytes(TWO_DASHES, out);
writeBytes(boundaryEncoded, out);
writeBytes(CR_LF, out);
//此处代码主要有3种不同的实现,不同的mode,实现方式不一样,采用的字符集也不同
formatMultipartHeader(part, out);
writeBytes(CR_LF, out);
if (writeContent) {
part.getBody().writeTo(out);
}
writeBytes(CR_LF, out);
}
writeBytes(TWO_DASHES, out);
writeBytes(boundaryEncoded, out);
writeBytes(TWO_DASHES, out);
writeBytes(CR_LF, out);
}
|
其中的formatMultipartHeader方法,不同的模式有不同的实现方式
MultipartEntityBuilder
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
|
MultipartFormEntity buildEntity() {
String boundaryCopy = boundary;
if (boundaryCopy == null && contentType != null ) {
boundaryCopy = contentType.getParameter( "boundary" );
}
if (boundaryCopy == null ) {
boundaryCopy = generateBoundary();
}
Charset charsetCopy = charset;
if (charsetCopy == null && contentType != null ) {
charsetCopy = contentType.getCharset();
}
final List<NameValuePair> paramsList = new ArrayList<NameValuePair>( 2 );
paramsList.add( new BasicNameValuePair( "boundary" , boundaryCopy));
if (charsetCopy != null ) {
paramsList.add( new BasicNameValuePair( "charset" , charsetCopy.name()));
}
final NameValuePair[] params = paramsList.toArray( new NameValuePair[paramsList.size()]);
final ContentType contentTypeCopy = contentType != null ?
contentType.withParameters(params) :
ContentType.create( "multipart/" + DEFAULT_SUBTYPE, params);
final List<FormBodyPart> bodyPartsCopy = bodyParts != null ? new ArrayList<FormBodyPart>(bodyParts) :
Collections.<FormBodyPart>emptyList();
//此处将mode赋值给modeCopy
final HttpMultipartMode modeCopy = mode != null ? mode : HttpMultipartMode.STRICT;
final AbstractMultipartForm form;
//此处根据modeCopy的值不同,构造3种form,每种的字符集都不一样,也是产生乱码的根源
switch (modeCopy) {
case BROWSER_COMPATIBLE:
form = new HttpBrowserCompatibleMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);
break ;
case RFC6532:
form = new HttpRFC6532Multipart(charsetCopy, boundaryCopy, bodyPartsCopy);
break ;
default :
form = new HttpStrictMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);
}
return new MultipartFormEntity(form, contentTypeCopy, form.getTotalLength());
}
public HttpEntity build() {
return buildEntity();
}
|
BROWSER_COMPATIBLE模式中的formatMultipartHeader方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
class HttpBrowserCompatibleMultipart extends AbstractMultipartForm {
private final List<FormBodyPart> parts;
public HttpBrowserCompatibleMultipart(
final Charset charset,
final String boundary,
final List<FormBodyPart> parts) {
super (charset, boundary);
this .parts = parts;
}
@Override
public List<FormBodyPart> getBodyParts() {
return this .parts;
}
/**
* Write the multipart header fields; depends on the style.
*/
@Override
protected void formatMultipartHeader(
final FormBodyPart part,
final OutputStream out) throws IOException {
// For browser-compatible, only write Content-Disposition
// Use content charset
final Header header = part.getHeader();
final MinimalField cd = header.getField(MIME.CONTENT_DISPOSITION);
//可以看到此处的字符集采用的是设置的字符集
writeField(cd, this .charset, out);
final String filename = part.getBody().getFilename();
if (filename != null ) {
final MinimalField ct = header.getField(MIME.CONTENT_TYPE);
//可以看到此处的字符集采用的也是设置的字符集
writeField(ct, this .charset, out);
}
}
}
|
RFC6532模式中的formatMultipartHeader方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
class HttpRFC6532Multipart extends AbstractMultipartForm {
private final List<FormBodyPart> parts;
public HttpRFC6532Multipart(
final Charset charset,
final String boundary,
final List<FormBodyPart> parts) {
super (charset, boundary);
this .parts = parts;
}
@Override
public List<FormBodyPart> getBodyParts() {
return this .parts;
}
@Override
protected void formatMultipartHeader(
final FormBodyPart part,
final OutputStream out) throws IOException {
// For RFC6532, we output all fields with UTF-8 encoding.
final Header header = part.getHeader();
for ( final MinimalField field: header) {
//可以看到此处的字符集默认采用UTF8
writeField(field, MIME.UTF8_CHARSET, out);
}
}
}
|
默认模式中的formatMultipartHeader方法
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
class HttpStrictMultipart extends AbstractMultipartForm {
private final List<FormBodyPart> parts;
public HttpStrictMultipart(
final Charset charset,
final String boundary,
final List<FormBodyPart> parts) {
super (charset, boundary);
this .parts = parts;
}
@Override
public List<FormBodyPart> getBodyParts() {
return this .parts;
}
@Override
protected void formatMultipartHeader(
final FormBodyPart part,
final OutputStream out) throws IOException {
// For strict, we output all fields with MIME-standard encoding.
//从上面注释中可以看到,此处的字符集采用的是默认字符集即ASCII(下面MIME类中可以看到)
final Header header = part.getHeader();
for ( final MinimalField field: header) {
writeField(field, out);
}
}
}
|
MIME类
1
2
3
4
5
6
7
8
9
10
11
|
public final class MIME {
public static final String CONTENT_TYPE = "Content-Type" ;
public static final String CONTENT_TRANSFER_ENC = "Content-Transfer-Encoding" ;
public static final String CONTENT_DISPOSITION = "Content-Disposition" ;
public static final String ENC_8BIT = "8bit" ;
public static final String ENC_BINARY = "binary" ;
/** The default character set to be used, i.e. "US-ASCII" */
public static final Charset DEFAULT_CHARSET = Consts.ASCII;
/** UTF-8 is used for RFC6532 */
public static final Charset UTF8_CHARSET = Consts.UTF_8;
}
|
解决方法
知道乱码产生的根源,乱码问题也就好解决了,解决方式有两种
设置mode为:BROWSER_COMPATIBLE,并设置字符集为UTF8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,
File fileToUpload) {
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
//设置模式为BROWSER_COMPATIBLE,并设置字符集为UTF8
builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);
builder.setCharset(Charset.forName( "UTF-8" ));
builder.addTextBody( "scenarioId" , scenarioId.toString());
for (String groupId : groupIds) {
builder.addTextBody( "groupIds" , groupId);
}
builder.addTextBody( "extension" , extension);
builder.addPart( "fileToUpload" , new FileBody(fileToUpload));
builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName());
builder.addTextBody( "isSplit" , "false" );
builder.addTextBody( "isRefresh" , "false" );
return builder.build();
}
|
设置模式为:RFC6532
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,
File fileToUpload) {
MultipartEntityBuilder builder = MultipartEntityBuilder.create();
//设置模式为RFC6532
builder.setMode(HttpMultipartMode.RFC6532);
builder.addTextBody( "scenarioId" , scenarioId.toString());
for (String groupId : groupIds) {
builder.addTextBody( "groupIds" , groupId);
}
builder.addTextBody( "extension" , extension);
builder.addPart( "fileToUpload" , new FileBody(fileToUpload));
builder.addTextBody( "type" , AssetFileTypeEnum.CSV.getName());
builder.addTextBody( "isSplit" , "false" );
builder.addTextBody( "isRefresh" , "false" );
return builder.build();
}
|
以上为个人经验,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/youshounianhua123/article/details/81100778