Delphi 2010:如何模拟 Delphi XE TStrings.Encoding 属性?

Delphi 2010 : How to emulate the Delphi XE TStrings.Encoding property?

Delphi XE在TStringsclass中添加了一个Encoding属性,存储了LoadFromFile()时从BOM读取的编码被调用。

Delphi2010没有这个属性。 我想效仿一下。

我为 TStrings 创建了下面的 class 助手。 助手工作,但要获取文件的 BOM,我发现的唯一解决方案是在 FileStream 中重新加载相同的文件。我想避免这种情况,因为 TStrings.LoadFromFile() 已经得到 BOM。

如何告诉助手重新使用已经找到的 BOM?

unit TestEncodingName_00;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, ExtDlgs;

type
  TForm1 = class(TForm)
  Memo1: TMemo;
  procedure FormCreate(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;

TMyStrings = class helper for TStrings   // emulate TStrings.Encoding
private
  function GetEncodingName(fPath:string):string;
public
property EncodingName[fPath:string]:string read GetEncodingName;
end;

var
Form1: TForm1;

implementation

{$R *.dfm}

function  TMyStrings.GetEncodingName(fPath:string):string;
var
  fLen : integer;
  fBuffer : TBytes;
  fEncoding : TEncoding;
  fName : string;
  fFs : TFileStream;

begin
   fFs := TFileStream.Create(fPath, fmOpenRead);
 try
    SetLength(fBuffer, 4);
    flen := fFs.Read(fBuffer[0], 4);
    if flen < 4 then
      SetLength(fBuffer, flen);
    fEncoding := nil;
    TEncoding.GetBufferEncoding(fBuffer, fEncoding);
    if fEncoding = TEncoding.Unicode then
    fName := 'Unicode'
  else if fEncoding = TEncoding.UTF8 then
    fName := 'UTF8'
  else fName := 'Default';
  finally
    fFs.Free;
  end;
  result := fName;

end;

procedure TForm1.FormCreate(Sender: TObject);
var
  EncName : string;

begin
(* sample UTF8.txt
Ā ā Ă ă
Ρ Σ Τ Υ
ぁ あ ぃ
*)
  Memo1.Lines.LoadFromFile('Sample UTF8.txt');
  //from here TStrings knows the BOM but I don't know
  // how to refer to it...
  // so I have to create again a stream of the same file to
  // get the BOM. I don't like that.
  EncName :=  Memo1.Lines.EncodingName['Sample UTF8.txt'];
  Memo1.Lines.Add(#13#10'Encoding : ' + EncName);
end;
end. 

首先,发现 BOM 编码的是 LoadFromStream(),而不是 LoadFromFile()LoadFromFile() 只是将文件打开到 TFileStream 中,然后调用 LoadFromStream().

在 Delphi(2009 年和)2010 年,发现的 BOM 编码未存储在您可以访问的任何位置。这正是 XE 通过添加新的 Encoding 属性 解决的问题。在解析之前将文件数据解码为 UnicodeString 时,编码仅用作 LoadFromStream() 内部的局部变量,然后在 LoadFromStream() 退出时被丢弃。您无法改变这种行为。

因此,唯一的解决方案是手动加载文件,以便您可以发现其 BOM。理想情况下,您可以在后代 class 中覆盖 LoadFromStream(),但您不能让 TMemo.Lines 使用自定义 class。 class helper 也不能覆盖虚拟方法。

但是,您可以从 TStringList 派生自定义 class 来覆盖 LoadFromStream(),自己加载文件,然后 Assign() TStringListTMemo.Lines。例如:

unit TestEncodingName_00;

interface

uses
  Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
  Dialogs, StdCtrls, ExtDlgs;

type
  TForm1 = class(TForm)
    Memo1: TMemo;
    procedure FormCreate(Sender: TObject);
  private
    { Private declarations }
  public
    { Public declarations }
  end;

  TMyStringList = class(TStringList)
  private
    fEncoding: TEncoding;
  public
    { The single-parameter LoadFromStream(Stream: TStream) simply
    calls LoadFromStream(Stream: TStream; Encoding: TEncoding) with
    the Encoding parameter set to nil, so you only have to override
    that version of LoadFromStream()... }
    procedure LoadFromStream(Stream: TStream; Encoding: TEncoding); override;
    property Encoding: TEncoding read fEncoding;
  end;

var
  Form1: TForm1;

implementation

{$R *.dfm}

procedure TMyStringList.LoadFromStream(Stream: TStream; Encoding: TEncoding);
var
  Size: Integer;
  Buffer: TBytes;
begin
  { this is similar to the implementation that LoadFromStream()
  uses in XE+, but with some differences:

  1. the Encoding property is assigned a bit differently, as XE+
     utilizes a TEncoding.Clone() method when the specified Encoding
     is not a standard RTL encoding (ie, is a custom user class), but
     Clone() does not exist in D2009/2010.

  2. XE+ has a TStrings.DefaultEncoding property, which is passed
     to TEncoding.GetBufferEncoding() as the default to return if no
     BOM is detected, but that property and parameter do not exist in
     D2009/2010.

  3. TStrings.LoadFromStream() does not check if Size is 0 (file is empty)
     before dereferencing the Buffer that is passed to Stream.Read().
     That is a runtime crash waiting to happen! }

  BeginUpdate;
  try
    Size := Stream.Size - Stream.Position;
    SetLength(Buffer, Size);
    if Size > 0 then
      Stream.Read(Buffer[0], Size);
    Size := TEncoding.GetBufferEncoding(Buffer, Encoding);
    fEncoding := Encoding;
    SetTextStr(Encoding.GetString(Buffer, Size, Length(Buffer) - Size));
  finally
    EndUpdate;
  end;
end;

procedure TForm1.FormCreate(Sender: TObject);
var
  EncName : string;
  List: TMyStringList;
begin
  List := TMyStringList.Create;
  try
    List.LoadFromFile('Sample UTF8.txt');
    if List.Encoding = TEncoding.Unicode then
      EncName := 'Unicode'
    else if List.Encoding = TEncoding.UTF8 then
      EncName := 'UTF8'
    else
      EncName := 'Default';

    Memo1.Lines.Assign(List);
    Memo1.Lines.Add;
    Memo1.Lines.Add('Encoding : ' + EncName);
  finally
    List.Free;
  end;
end;

end.