The ZDetectUTF8Encoding of the ZEncoding unit has a bug

The offical for ZeosLib 7.3 Report problems, ask for help, post proposals for the new version of Zeoslib 7.3/v8
Quick Info:
-We made two new drivers: odbc(raw and unicode version) and oledb
-GUID domain/field-defined support for FB
-extended error infos of Firebird
-performance ups are still in queue
In future some more feature will arrive, so stay tuned and don't hassitate to help
Post Reply
zhuyl
Junior Boarder
Junior Boarder
Posts: 26
Joined: 22.11.2021, 09:09

The ZDetectUTF8Encoding of the ZEncoding unit has a bug

Post by zhuyl »

The ZDetectUTF8Encoding of the ZEncoding unit has a bug. SizeOf(PCardinal) should be changed to SizeOf(Cardinal)

function ZDetectUTF8Encoding(Source: PAnsiChar; Len: NativeUInt): TEncodeType;
var
c: Byte;
EndPtr: PAnsichar;
begin
Result := etUSASCII;
if (Source = nil) or (Len = 0) then Exit;

EndPtr := Source + Len -SizeOf(Cardinal);

// skip leading US-ASCII part.
while Source <= EndPtr do //Check next quad
begin
if PCardinal(Source)^ and $80808080<>0 then Break; //break on first non USASCII sequence
inc(Source, SizeOf(Cardinal));
end;
Inc(EndPtr, SizeOf(Cardinal));

while Source <EndPtr do //Check bytes
begin
if Byte(Source^) >= $80 then break; //break on first non USASCII sequence
inc(Source);
end;

// If all character is US-ASCII, done.
if Source = EndPtr then exit;

while Source <EndPtr do
begin
c := Byte(Source^);
case c of
$00..$7F: //Ascii7
if (EndPtr-Source> SizeOf(PCardinal)) and (PCardinal(Source)^ and $80808080 = 0) then //Check quad block ASCII again
inc(Source, SizeOf(PCardinal)) //bug
else
Inc(Source);
$C2..$DF: // non-overlong 2-byte
if (Source+1 <EndPtr)
and (Byte((Source+1)^) in [$80..$BF]) then
Inc(Source, 2)
else
break;

$E0: // excluding overlongs
if (Source+2 <EndPtr)
and (Byte((Source+1)^) in [$A0..$BF])
and (Byte((Source+2)^) in [$80..$BF]) then
Inc(Source, 3)
else
break;

$E1..$EF: // straight 3-byte & excluding surrogates
if (Source+2 <EndPtr)
and (Byte((Source+1)^) in [$80..$BF])
and (Byte((Source+2)^) in [$80..$BF]) then
Inc(Source, 3)
else
break;

$F0: // planes 1-3
if (Source+3 <EndPtr)
and (Byte((Source+1)^) in [$90..$BF])
and (Byte((Source+2)^) in [$80..$BF])
and (Byte((Source+3)^) in [$80..$BF]) then
Inc(Source, 4)
else
break;

$F1..$F3:
if (Source+3 <EndPtr)
and (Byte((Source+1)^) in [$80..$BF])
and (Byte((Source+2)^) in [$80..$BF])
and (Byte((Source+3)^) in [$80..$BF]) then
Inc(Source, 4)
else
break;

$F4:
if (Source+3 <EndPtr)
and (Byte((Source+1)^) in [$80..$8F])
and (Byte((Source+2)^) in [$80..$BF])
and (Byte((Source+3)^) in [$80..$BF]) then
Inc(Source, 4)
else
break;
else
break;
end;
end;

if Source = EndPtr then Result := etUTF8
else Result := etANSI;
end;
Post Reply