Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- unit StreamText;
- interface
- uses
- System.SysUtils, System.Classes;
- type
- TBOMMode = (bomNone, bomPresent);
- procedure AssignStream(out AText: TextFile; const AStream: TStream; const ABufferSize: Integer = SizeOf(TTextBuf); const AMode: Integer = fmClosed; const ABOMMode: TBOMMode = bomPresent);
- function DetectEncoding(const AStream: TStream): TEncoding;
- implementation
- const
- CP_ACP = 0;
- CP_UTF8 = 65001;
- function StreamNOPProc(var Rec: TTextRec): Integer;
- begin
- // Does nothing
- Result := 0;
- end;
- function StreamIn(var Rec: TTextRec): Integer;
- begin
- Rec.BufEnd := 0;
- Rec.BufPos := 0;
- Rec.BufEnd := TStream(Rec.Handle).Read(Rec.BufPtr^, Rec.BufSize);
- Result := 0;
- end;
- function StreamOut(var Rec: TTextRec): Integer;
- begin
- if Rec.BufPos <> 0 then
- begin
- TStream(Rec.Handle).WriteBuffer(Rec.BufPtr^, Rec.BufPos);
- Rec.BufPos := 0;
- end;
- Result := 0;
- end;
- function StreamFlush(var Rec: TTextRec): Integer;
- begin
- Result := StreamOut(Rec);
- end;
- type
- TUTF8EncoderStream = class(TStream)
- strict private
- FStream: TStream;
- FEncoding: TEncoding;
- FOriginalPosition: Int64;
- FFixedMultiplier: Integer;
- function CountToRawCount(const ACount, APos: Int64): Int64;
- function RawCountToCount(const ARAWCount, APos: Int64): Int64;
- protected
- function GetSize: Int64; override;
- procedure SetSize(NewSize: Longint); overload; override;
- procedure SetSize(const NewSize: Int64); overload; override;
- public
- function Read(var Buffer; Count: Longint): Longint; override;
- function Write(const Buffer; Count: Longint): Longint; override;
- function Seek(Offset: Longint; Origin: Word): Longint; overload; override;
- function Seek(const Offset: Int64; Origin: TSeekOrigin): Int64; overload; override;
- public
- constructor Create(const AStream: TStream; const AEncoding: TEncoding);
- property Stream: TStream read FStream;
- end;
- procedure FreeBuffer(var Rec: TTextRec);
- begin
- if Assigned(PPointer(@Rec.UserData[2])^) then
- begin
- FreeMem(PPointer(@Rec.UserData[2])^);
- PPointer(@Rec.UserData[2])^ := nil;
- end;
- end;
- function StreamClose(var Rec: TTextRec): Integer;
- var
- Obj: TObject;
- begin
- if TStream(Rec.Handle) is TUTF8EncoderStream then
- begin
- Obj := TObject(Rec.Handle);
- TStream(Rec.Handle) := TUTF8EncoderStream(Rec.Handle).Stream;
- FreeAndNil(Obj);
- end;
- FreeBuffer(Rec);
- Rec.Mode := fmClosed;
- Result := 0;
- end;
- function DetectEncoding(const AStream: TStream): TEncoding;
- var
- Buf: TBytes;
- Sz: Integer;
- begin
- AStream.Position := 0;
- SetLength(Buf, 256);
- Sz := AStream.Read(Pointer(Buf)^, Length(Buf));
- SetLength(Buf, Sz);
- if Length(Buf) > 0 then
- begin
- TEncoding.GetBufferEncoding(Buf, Result, TEncoding.Default);
- AStream.Position := Length(Result.GetPreamble);
- end
- else
- begin
- AStream.Position := 0;
- Result := TEncoding.Default;
- end;
- end;
- function StreamOpen(var Rec: TTextRec): Integer;
- const
- BOM_UTF8: array[0..2] of Byte = ($EF, $BB, $BF);
- var
- Encoding: TEncoding;
- BOMMode: TBOMMode;
- begin
- BOMMode := TBOMMode(Rec.UserData[1]);
- Rec.BufPos := 0;
- Rec.BufEnd := 0;
- Rec.FlushFunc := @StreamNOPProc;
- Rec.CloseFunc := @StreamClose;
- Result := 0;
- case Rec.Mode of
- fmInput: // called by Reset
- begin
- Encoding := DetectEncoding(TStream(Rec.Handle));
- if not (Encoding is TUTF8Encoding) then
- TStream(Rec.Handle) := TUTF8EncoderStream.Create(TStream(Rec.Handle), Encoding);
- Rec.InOutFunc := @StreamIn;
- end;
- fmOutput: // called by Rewrite
- begin
- Rec.InOutFunc := @StreamOut;
- Rec.FlushFunc := @StreamFlush;
- if BOMMode <> bomNone then
- TStream(Rec.Handle).WriteBuffer(BOM_UTF8, SizeOf(BOM_UTF8));
- end;
- fmInOut: // called by Append
- begin
- Encoding := DetectEncoding(TStream(Rec.Handle));
- if not (Encoding is TUTF8Encoding) then
- TStream(Rec.Handle) := TUTF8EncoderStream.Create(TStream(Rec.Handle), Encoding);
- Rec.InOutFunc := @StreamOut;
- Rec.FlushFunc := @StreamFlush;
- end;
- else
- Exit;
- end;
- if Rec.BufSize <= SizeOf(Rec.Buffer) then
- Rec.BufPtr := Rec.Buffer
- else
- begin
- FreeBuffer(Rec);
- Rec.BufPtr := AllocMem(Rec.BufSize);
- PPointer(@Rec.UserData[2])^ := Rec.BufPtr;
- end;
- end;
- procedure AssignStream(out AText: TextFile; const AStream: TStream; const ABufferSize: Integer; const AMode: Integer; const ABOMMode: TBOMMode);
- type
- PTextRec = ^TTextRec;
- var
- Rec: PTextRec;
- begin
- Rec := @AText;
- FillChar(Rec^, SizeOf(TTextRec), 0);
- if ABOMMode <> bomNone then
- AStream.Position := 0;
- TStream(Rec.Handle) := AStream;
- Rec.CodePage := CP_UTF8;
- Rec.Mode := AMode;
- Rec.Flags := tfCRLF * Byte(DefaultTextLineBreakStyle);
- if ABufferSize <= SizeOf(Rec.Buffer) then
- begin
- Rec.BufPtr := Rec.Buffer;
- Rec.BufSize := SizeOf(Rec.Buffer);
- end
- else
- begin
- Rec.BufPtr := nil;
- Rec.BufSize := ABufferSize;
- end;
- Rec.OpenFunc := @StreamOpen;
- Rec.UserData[1] := Ord(ABOMMode);
- StreamOpen(Rec^);
- end;
- { TUTF8EncoderStream }
- constructor TUTF8EncoderStream.Create(const AStream: TStream;
- const AEncoding: TEncoding);
- begin
- inherited Create;
- FStream := AStream;
- FEncoding := AEncoding;
- FOriginalPosition := FStream.Position;
- if AEncoding.IsSingleByte then
- FFixedMultiplier := 1
- else
- if (AEncoding.ClassType = TUnicodeEncoding) or
- (AEncoding.ClassType = TBigEndianUnicodeEncoding) then
- FFixedMultiplier := 2
- else
- Assert(False, 'Not supported');
- end;
- function TUTF8EncoderStream.Read(var Buffer; Count: Integer): Longint;
- var
- Buf: TBytes;
- RAWCount: Integer;
- Pos: Int64;
- UTF8Buf: UTF8String;
- begin
- Pos := FStream.Position;
- RAWCount := CountToRawCount(Count, Pos);
- SetLength(Buf, RAWCount);
- RAWCount := FStream.Read(Pointer(Buf)^, RAWCount);
- SetLength(Buf, RAWCount);
- UTF8Buf := UTF8String(FEncoding.GetString(Buf));
- Move(Pointer(UTF8Buf)^, Buffer, Length(UTF8Buf));
- Result := Length(UTF8Buf);
- end;
- function TUTF8EncoderStream.Write(const Buffer; Count: Integer): Longint;
- var
- UTF8Buf: UTF8String;
- Buf: TBytes;
- begin
- SetLength(UTF8Buf, Count);
- Move(Buffer, Pointer(UTF8Buf)^, Length(UTF8Buf));
- Buf := FEncoding.GetBytes(String(UTF8Buf));
- FStream.WriteBuffer(Pointer(Buf), Length(Buf));
- Result := Length(Buf);
- end;
- function TUTF8EncoderStream.Seek(const Offset: Int64; Origin: TSeekOrigin): Int64;
- begin
- case Origin of
- soBeginning: FStream.Position := CountToRawCount(Offset, 0);
- soCurrent: FStream.Position := FStream.Position + CountToRawCount(Offset, FStream.Position);
- soEnd: FStream.Position := FStream.Size + CountToRawCount(Offset, FStream.Size);
- end;
- Result := RAWCountToCount(FStream.Position, 0);
- end;
- function TUTF8EncoderStream.GetSize: Int64;
- var
- RAWCount: Integer;
- begin
- RAWCount := FStream.Size - FOriginalPosition;
- Result := RAWCountToCount(RAWCount, FOriginalPosition);
- end;
- procedure TUTF8EncoderStream.SetSize(const NewSize: Int64);
- begin
- FStream.Size := CountToRawCount(NewSize, 0);
- end;
- function TUTF8EncoderStream.Seek(Offset: Integer; Origin: Word): Longint;
- var
- Offs: Int64;
- Org: TSeekOrigin;
- begin
- Offs := Offset;
- Org := TSeekOrigin(Origin);
- Result := Seek(Offs, Org);
- end;
- procedure TUTF8EncoderStream.SetSize(NewSize: Integer);
- var
- Sz: Int64;
- begin
- Sz := NewSize;
- SetSize(Sz);
- end;
- function TUTF8EncoderStream.CountToRawCount(const ACount, APos: Int64): Int64;
- begin
- if FFixedMultiplier <> 0 then
- begin
- Result := ACount * FFixedMultiplier;
- Exit;
- end;
- Assert(False, 'Not supported');
- Result := ACount;
- end;
- function TUTF8EncoderStream.RawCountToCount(const ARAWCount, APos: Int64): Int64;
- begin
- if FFixedMultiplier <> 0 then
- begin
- Result := ARAWCount div FFixedMultiplier;
- Exit;
- end;
- Assert(False, 'Not supported');
- Result := ARAWCount;
- end;
- end.
Add Comment
Please, Sign In to add comment