char* buf = new char[ length+1 ];
buf[0] = 0;
if ( fread( buf, length, 1, file ) != 1 ) {
delete [] buf;
SetError( TIXML_ERROR_OPENING_FILE, 0, 0, TIXML_ENCODING_UNKNOWN );
return false;
}
// Process the buffer in place to normalize new lines. (See comment above.)
// Copies from the 'p' to 'q' pointer, where p can advance faster if
// a newline-carriage return is hit.
//
// Wikipedia:
// Systems based on ASCII or a compatible character set use either LF (Line feed, '\n', 0x0A, 10 in decimal) or
// CR (Carriage return, '\r', 0x0D, 13 in decimal) individually, or CR followed by LF (CR+LF, 0x0D 0x0A)...
// * LF: Multics, Unix and Unix-like systems (GNU/Linux, AIX, Xenix, Mac OS X, FreeBSD, etc.), BeOS, Amiga, RISC OS, and others
// * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS
// * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9
const char* p = buf; // the read head
char* q = buf; // the write head
const char CR = 0x0d;
const char LF = 0x0a;
buf[length] = 0;
while( *p ) {
assert( p < (buf+length) );
assert( q <= (buf+length) );
assert( q <= p );
if ( *p == CR ) {
*q++ = LF;
p++;
if ( *p == LF ) { // check for CR+LF (and skip LF)
p++;
}
}
else {
*q++ = *p++;
}
}
assert( q <= (buf+length) );
*q = 0;
Parse( buf, 0, encoding );
delete [] buf;
return !Error();
}
16 个解决方案
#1
CR是#13回车,LF是#10换行
//判断p处的值是否和CR相等
if ( *p == CR ) {
//如果相等,那么q地址处的值=LF,然后q地址加一
*q++ = LF;
//p地址加一
p++;
//如果p处的值=LF
if ( *p == LF ) { // check for CR+LF (and skip LF)
//p地址加一
p++;
}
}
else {
//q处值=p处的值,然后q和p地址分别加一
*q++ = *p++;
}
#2
就是要么你自己输入cr + lf 要么你输入cr 系统内部自己生成lf这就是为什么有的时候'\0'会有结束加换行的结果
#3
#4
cr是换行 cf是空格吧。这程序是不是测试字符串的。
#5
CR是#13回车,LF是#10换行
#6
我知道它们的意思,但是我看不懂这段代码。
#7
if ( *p == CR ) {
*q++ = LF;
p++;
if ( *p == LF ) { // check for CR+LF (and skip LF)
p++;
}
}
else {
*q++ = *p++;
}
就是把 \r\n 转成 \n
把windows的换行转成linux等的换行。代码上的注释写了什么意思了
这个功能类似于 linux里的 dos2unix 命令。
*q++ = LF;
p++;
if ( *p == LF ) { // check for CR+LF (and skip LF)
p++;
}
}
else {
*q++ = *p++;
}
就是把 \r\n 转成 \n
把windows的换行转成linux等的换行。代码上的注释写了什么意思了
这个功能类似于 linux里的 dos2unix 命令。
#8
// * LF: Multics, Unix and Unix-like systems (GNU/Linux, AIX, Xenix, Mac OS X, FreeBSD, etc.), BeOS, Amiga, RISC OS, and others linux等系统已 LF 作为换行符
// * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS windows等系统已 CRLF 作为换行
// * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9 Mac等系统已 CR 作为换行
// * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS windows等系统已 CRLF 作为换行
// * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9 Mac等系统已 CR 作为换行
#9
Hi,我对这段代码的理解如下:
如果是CR+LF,最后应该变成LF+LF,怎么会是CR+LF变成LF呢?
不明白。
#10
没人回答????
#11
没有人回答????
#12
楼主不妨再分析一下……
进入下一次循环的时候,第二个LF会变成什么?
#13
~~~~~~~~~~~~~~~~
不同系统其换行符不一样,那一段代码就是兼容(把 \r \r\n 都统一成 \n)
这样处理就可以跨平台了....
不知道你还纠结着什么
不同系统其换行符不一样,那一段代码就是兼容(把 \r \r\n 都统一成 \n)
这样处理就可以跨平台了....
不知道你还纠结着什么
#14
我知道它的目的是为了把'\r\r\n'变成'\n',可是看来看去这段代码并不能够达到这样的效果,我自己做了试验,以下代码可以直接编译通过,大家试试:
#include <iostream>
#include <assert.h>
using namespace std;
const char CR = 0x0d;
const char LF = 0x0a;
const int length = 5;
int main()
{
char buf[length+1];
buf[0] = 'H';
buf[1] = CR; buf[2] = CR; buf[3] = LF; buf[4] = 'Y';
buf[length] = 0;
const char* p = buf;
char* q = buf;
int i = 0;
while( *p ) {
assert( p < (buf+length) );
assert( q <= (buf+length) );
assert( q <= p );
if ( *p == CR ) {
cout << "1->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = LF;
p++;
cout << "2->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl << endl;
cout << "------------------------------------" << endl << endl;
if ( *p == LF ) {
cout << "3->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
p++;
cout << "4->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
}
else {
cout << "5->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = *p++;
cout << "6->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
i++;
}
}
输出结果,红色部分表示刚开始和结束的字符串:
G:\devcpp_test>test_2.exe
5->buf: H
Y
i:0, *p:72, *q:72
----------------
6->buf: H
Y
i:0, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:1, *p:13, *q:13
----------------
2->buf: H
Y
i:1, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:2, *p:13, *q:13
----------------
2->buf: H
Y
i:2, *p:10, *q:10
------------------------------------
3->buf: H
Y
i:2, *p:10, *q:10
----------------
4->buf: H
Y
i:2, *p:89, *q:10
------------------------------------
5->buf: H
Y
i:3, *p:89, *q:10
----------------
6->buf: H
YY
i:3, *p:0, *q:89
------------------------------------
#include <iostream>
#include <assert.h>
using namespace std;
const char CR = 0x0d;
const char LF = 0x0a;
const int length = 5;
int main()
{
char buf[length+1];
buf[0] = 'H';
buf[1] = CR; buf[2] = CR; buf[3] = LF; buf[4] = 'Y';
buf[length] = 0;
const char* p = buf;
char* q = buf;
int i = 0;
while( *p ) {
assert( p < (buf+length) );
assert( q <= (buf+length) );
assert( q <= p );
if ( *p == CR ) {
cout << "1->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = LF;
p++;
cout << "2->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl << endl;
cout << "------------------------------------" << endl << endl;
if ( *p == LF ) {
cout << "3->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
p++;
cout << "4->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
}
else {
cout << "5->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = *p++;
cout << "6->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
i++;
}
}
输出结果,红色部分表示刚开始和结束的字符串:
G:\devcpp_test>test_2.exe
5->buf: H
Y
i:0, *p:72, *q:72
----------------
6->buf: H
Y
i:0, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:1, *p:13, *q:13
----------------
2->buf: H
Y
i:1, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:2, *p:13, *q:13
----------------
2->buf: H
Y
i:2, *p:10, *q:10
------------------------------------
3->buf: H
Y
i:2, *p:10, *q:10
----------------
4->buf: H
Y
i:2, *p:89, *q:10
------------------------------------
5->buf: H
Y
i:3, *p:89, *q:10
----------------
6->buf: H
YY
i:3, *p:0, *q:89
------------------------------------
#15
p跳过LF,指向LF后的一个位置,q指向LF。进入下个循环。
如果p指向的不是CR,那么就把p赋给q,否则给p赋LF。
CRLF怎么会变成LFLF呢?CRLFCR 这样的情况才会出LFLF
#16
你自己写的测试也说明一切....
buf[0] = 'H'; buf[1] = CR; buf[2] = CR; buf[3] = LF; buf[4] = 'Y';
H 换行 换行 Y
而最后输出
6->buf: H (换行)
(换行)
Y Y <<====这里多一个Y是因为你最后还没有 *q = 0; 操作
i:3, *p:0, *q:89
------------------------------------
难道你认为 \r, \r\n 这里应是一个换行符??
//换行符有 \r, \n, \r\n 这属于一个换行符
//而 \r\r, \n\r, \n\n...这些都是两个换行符
#1
CR是#13回车,LF是#10换行
//判断p处的值是否和CR相等
if ( *p == CR ) {
//如果相等,那么q地址处的值=LF,然后q地址加一
*q++ = LF;
//p地址加一
p++;
//如果p处的值=LF
if ( *p == LF ) { // check for CR+LF (and skip LF)
//p地址加一
p++;
}
}
else {
//q处值=p处的值,然后q和p地址分别加一
*q++ = *p++;
}
#2
就是要么你自己输入cr + lf 要么你输入cr 系统内部自己生成lf这就是为什么有的时候'\0'会有结束加换行的结果
#3
#4
cr是换行 cf是空格吧。这程序是不是测试字符串的。
#5
CR是#13回车,LF是#10换行
#6
我知道它们的意思,但是我看不懂这段代码。
#7
if ( *p == CR ) {
*q++ = LF;
p++;
if ( *p == LF ) { // check for CR+LF (and skip LF)
p++;
}
}
else {
*q++ = *p++;
}
就是把 \r\n 转成 \n
把windows的换行转成linux等的换行。代码上的注释写了什么意思了
这个功能类似于 linux里的 dos2unix 命令。
*q++ = LF;
p++;
if ( *p == LF ) { // check for CR+LF (and skip LF)
p++;
}
}
else {
*q++ = *p++;
}
就是把 \r\n 转成 \n
把windows的换行转成linux等的换行。代码上的注释写了什么意思了
这个功能类似于 linux里的 dos2unix 命令。
#8
// * LF: Multics, Unix and Unix-like systems (GNU/Linux, AIX, Xenix, Mac OS X, FreeBSD, etc.), BeOS, Amiga, RISC OS, and others linux等系统已 LF 作为换行符
// * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS windows等系统已 CRLF 作为换行
// * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9 Mac等系统已 CR 作为换行
// * CR+LF: DEC RT-11 and most other early non-Unix, non-IBM OSes, CP/M, MP/M, DOS, OS/2, Microsoft Windows, Symbian OS windows等系统已 CRLF 作为换行
// * CR: Commodore 8-bit machines, Apple II family, Mac OS up to version 9 and OS-9 Mac等系统已 CR 作为换行
#9
Hi,我对这段代码的理解如下:
如果是CR+LF,最后应该变成LF+LF,怎么会是CR+LF变成LF呢?
不明白。
#10
没人回答????
#11
没有人回答????
#12
楼主不妨再分析一下……
进入下一次循环的时候,第二个LF会变成什么?
#13
~~~~~~~~~~~~~~~~
不同系统其换行符不一样,那一段代码就是兼容(把 \r \r\n 都统一成 \n)
这样处理就可以跨平台了....
不知道你还纠结着什么
不同系统其换行符不一样,那一段代码就是兼容(把 \r \r\n 都统一成 \n)
这样处理就可以跨平台了....
不知道你还纠结着什么
#14
我知道它的目的是为了把'\r\r\n'变成'\n',可是看来看去这段代码并不能够达到这样的效果,我自己做了试验,以下代码可以直接编译通过,大家试试:
#include <iostream>
#include <assert.h>
using namespace std;
const char CR = 0x0d;
const char LF = 0x0a;
const int length = 5;
int main()
{
char buf[length+1];
buf[0] = 'H';
buf[1] = CR; buf[2] = CR; buf[3] = LF; buf[4] = 'Y';
buf[length] = 0;
const char* p = buf;
char* q = buf;
int i = 0;
while( *p ) {
assert( p < (buf+length) );
assert( q <= (buf+length) );
assert( q <= p );
if ( *p == CR ) {
cout << "1->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = LF;
p++;
cout << "2->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl << endl;
cout << "------------------------------------" << endl << endl;
if ( *p == LF ) {
cout << "3->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
p++;
cout << "4->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
}
else {
cout << "5->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = *p++;
cout << "6->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
i++;
}
}
输出结果,红色部分表示刚开始和结束的字符串:
G:\devcpp_test>test_2.exe
5->buf: H
Y
i:0, *p:72, *q:72
----------------
6->buf: H
Y
i:0, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:1, *p:13, *q:13
----------------
2->buf: H
Y
i:1, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:2, *p:13, *q:13
----------------
2->buf: H
Y
i:2, *p:10, *q:10
------------------------------------
3->buf: H
Y
i:2, *p:10, *q:10
----------------
4->buf: H
Y
i:2, *p:89, *q:10
------------------------------------
5->buf: H
Y
i:3, *p:89, *q:10
----------------
6->buf: H
YY
i:3, *p:0, *q:89
------------------------------------
#include <iostream>
#include <assert.h>
using namespace std;
const char CR = 0x0d;
const char LF = 0x0a;
const int length = 5;
int main()
{
char buf[length+1];
buf[0] = 'H';
buf[1] = CR; buf[2] = CR; buf[3] = LF; buf[4] = 'Y';
buf[length] = 0;
const char* p = buf;
char* q = buf;
int i = 0;
while( *p ) {
assert( p < (buf+length) );
assert( q <= (buf+length) );
assert( q <= p );
if ( *p == CR ) {
cout << "1->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = LF;
p++;
cout << "2->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl << endl;
cout << "------------------------------------" << endl << endl;
if ( *p == LF ) {
cout << "3->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
p++;
cout << "4->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
}
else {
cout << "5->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "----------------" << endl;
*q++ = *p++;
cout << "6->buf: " << buf << endl;
cout << "i:" << i << ", *p:" << int(*p) << ", *q:" << int(*q) << endl;
cout << "------------------------------------" << endl << endl;
}
i++;
}
}
输出结果,红色部分表示刚开始和结束的字符串:
G:\devcpp_test>test_2.exe
5->buf: H
Y
i:0, *p:72, *q:72
----------------
6->buf: H
Y
i:0, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:1, *p:13, *q:13
----------------
2->buf: H
Y
i:1, *p:13, *q:13
------------------------------------
1->buf: H
Y
i:2, *p:13, *q:13
----------------
2->buf: H
Y
i:2, *p:10, *q:10
------------------------------------
3->buf: H
Y
i:2, *p:10, *q:10
----------------
4->buf: H
Y
i:2, *p:89, *q:10
------------------------------------
5->buf: H
Y
i:3, *p:89, *q:10
----------------
6->buf: H
YY
i:3, *p:0, *q:89
------------------------------------
#15
p跳过LF,指向LF后的一个位置,q指向LF。进入下个循环。
如果p指向的不是CR,那么就把p赋给q,否则给p赋LF。
CRLF怎么会变成LFLF呢?CRLFCR 这样的情况才会出LFLF
#16
你自己写的测试也说明一切....
buf[0] = 'H'; buf[1] = CR; buf[2] = CR; buf[3] = LF; buf[4] = 'Y';
H 换行 换行 Y
而最后输出
6->buf: H (换行)
(换行)
Y Y <<====这里多一个Y是因为你最后还没有 *q = 0; 操作
i:3, *p:0, *q:89
------------------------------------
难道你认为 \r, \r\n 这里应是一个换行符??
//换行符有 \r, \n, \r\n 这属于一个换行符
//而 \r\r, \n\r, \n\n...这些都是两个换行符