Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- unit Parser;
- {$mode objfpc}
- {$h+}
- interface
- uses
- Classes, Contnrs, Drupal;
- function ParseXML(Strings: TStrings): TObjectList;
- implementation
- uses
- SysUtils;
- type
- TSpan = record
- Content: string;
- X: Integer;
- end;
- TLine = array of TSpan;
- const
- BLOCK_JUNK_LENGTH = 2;
- BLOCK_HEAD_LENGTH = 21;
- DOLLAR_RATE = 8;
- var
- Data: array of TLine;
- Offset: Integer;
- Strings: TStrings;
- function CreateStruct(const Line: TLine): TStringList; forward;
- function IsLineEnded(X: Integer): Boolean; forward;
- function IsSpanCollapsible(X: Integer): Boolean; forward;
- function IsSpanEnded(): Boolean; forward;
- function ParseLine(): TLine; forward;
- function ParseLine(Count: Integer): TLine; forward;
- function ParseSpan(): TSpan; forward;
- procedure Print(const Line: TLine); forward;
- procedure PushLine(const Line: TLine); forward;
- function ReadC(Node: string): Char; forward;
- function ReadX(Node: string): Integer; forward;
- function Translate(const Line: TLine): TContent; forward;
- { CreateStruct }
- function CreateStruct(const Line: TLine): TStringList;
- var
- Diff0, Diff1: Integer;
- Head: TLine;
- I, J: Integer;
- Match: Integer;
- begin
- Result := TStringList.Create();
- Head := Parser.Data[1];
- for I := 0 to Length(Line) - 1 do
- begin
- // <hardcode>
- if Line[I].X = 117 then
- begin
- Result.Values['Район'] := Line[I].Content;
- continue;
- end;
- if (Line[I].X > 430) and (Line[I].X < 470) then
- begin
- Result.Values['Номер'] := Line[I].Content;
- continue;
- end;
- // </hardcode>
- Match := 0;
- Diff0 := Abs(Line[I].X - Head[Match].X);
- for J := 1 to Length(Head) - 1 do
- begin
- Diff1 := Abs(Line[I].X - Head[J].X);
- if (Diff1 < Diff0) then
- begin
- Diff0 := Diff1;
- Match := J;
- end;
- end;
- Result.Values[Head[Match].Content] := Line[I].Content;
- end;
- end;
- { IsLineEnded }
- function IsLineEnded(X: Integer): Boolean;
- var
- I: Integer absolute Parser.Offset;
- S: TStrings absolute Parser.Strings;
- begin
- if (S[I][2] = 's') then
- Result := (ReadX(S[I]) = X)
- else
- Result := (S[I][2] = '/') and (S[I][3] = 'p');
- end;
- { IsSpanCollapsible }
- function IsSpanCollapsible(X: Integer): Boolean;
- var
- I: Integer absolute Parser.Offset;
- S: TStrings absolute Parser.Strings;
- begin
- if (I + 1 < S.Count) and (S[I + 1][2] = 's') then
- Result := ReadX(S[I + 1]) = X
- else
- Result := false;
- end;
- { IsSpanEnded }
- function IsSpanEnded(): Boolean;
- var
- I: Integer absolute Parser.Offset;
- S: TStrings absolute Parser.Strings;
- begin
- Result := (S[I][2] = '/') and (S[I][3] = 's');
- end;
- { ParseXML }
- function ParseXML(Strings: TStrings): TObjectList;
- var
- I: Integer;
- begin
- Parser.Offset := 0;
- Parser.Strings := Strings;
- PushLine(ParseLine(BLOCK_JUNK_LENGTH));
- PushLine(ParseLine(BLOCK_HEAD_LENGTH));
- while Parser.Offset < Parser.Strings.Count {hack to skip last page tag} - 1 {/hack} do
- PushLine(ParseLine());
- Result := TObjectList.Create();
- for I := 2 to Length(Parser.Data) - 1 do
- Result.Add(Translate(Parser.Data[I]));
- end;
- { ParseLine }
- function ParseLine(): TLine;
- var
- I: Integer absolute Parser.Offset;
- S: TStrings absolute Parser.Strings;
- J: Integer = 1;
- begin
- SetLength(Result, 1);
- Result[0] := ParseSpan();
- while I < S.Count do
- begin
- SetLength(Result, J + 1);
- Result[J] := ParseSpan();
- if (J = 20) or IsLineEnded(Result[0].X) then
- break;
- Inc(J);
- end;
- end;
- function ParseLine(Count: Integer): TLine;
- var
- I: Integer;
- begin
- SetLength(Result, Count);
- for I := 0 to Count - 1 do
- Result[I] := ParseSpan();
- end;
- {
- ParseSpan
- Считывает спан из документа, а затем перемещает курсор
- к следующему спану или в конец документа.
- }
- function ParseSpan(): TSpan;
- var
- I: Integer absolute Parser.Offset;
- S: TStrings absolute Parser.Strings;
- begin
- Result.Content := '';
- while I < S.Count do
- begin
- if S[I][2] = 'c' then
- Result.Content := Result.Content + ReadC(S[I])
- else
- if S[I][2] = 's' then
- Result.X := ReadX(S[I])
- else
- if IsSpanEnded() then
- if IsSpanCollapsible(Result.X) then
- Inc(I)
- else
- break;
- Inc(I);
- end;
- Inc(I);
- end;
- { Print }
- procedure Print(const Line: TLine);
- var
- I: Integer;
- begin
- WriteLn('line {');
- for I := 0 to Length(Line) - 1 do
- begin
- WriteLn(' span {');
- WriteLn(' content: ''', Line[I].Content, ''';');
- WriteLn(' x: ', Line[I].X, ';');
- WriteLn(' }');
- end;
- WriteLn('}');
- WriteLn();
- end;
- { PushLine }
- procedure PushLine(const Line: TLine);
- var
- I: Integer;
- begin
- I := Length(Data);
- if (I <= 2) or ((I > 2) and (Data[1][0].X <> Line[0].X)) then
- begin
- SetLength(Data, I + 1);
- Data[I] := Line;
- end;
- end;
- { ReadC }
- function ReadC(Node: string): Char;
- begin
- Delete(Node, 1, Pos('c=', Node) + 2);
- Node := Copy(Node, 1, Pos('"', Node) - 1);
- if Length(Node) > 1 then
- Result := WideChar(StrToInt('$' + Copy(Node, 4, Length(Node) - 4)))
- else
- Result := WideChar(Node[1]);
- end;
- { ReadX }
- function ReadX(Node: string): Integer;
- begin
- Delete(Node, 1, Pos('bbox', Node) + 5);
- Result := StrToInt(Copy(Node, 1, Pos(' ', Node) - 1));
- end;
- { Translate }
- function GetNumber(S: string): Integer;
- begin
- Result := StrToIntDef(S, 0);
- if Result = 0 then
- Result := -1;
- end;
- function GetDistrict(S: TStrings): TContentDistrict;
- var
- D: string;
- begin
- D := S.Values['Район'];
- if D = 'Голосеевский' then
- exit(cdGol);
- if D = 'Дарницкий' then
- exit(cdDar);
- if D = 'Деснянский' then
- exit(cdDes);
- if D = 'Днепровский' then
- exit(cdDne);
- if D = 'Оболонский' then
- exit(cdObo);
- if D = 'Печерский' then
- exit(cdPec);
- if D = 'Подольский' then
- exit(cdPod);
- if D = 'Святошинский' then
- exit(cdSvy);
- if D = 'Соломенский' then
- exit(cdSol);
- if D = 'Шевченковский' then
- exit(cdShe);
- Result := cdOther;
- end;
- function GetAddress(S: TStrings): string;
- var
- A, B: string;
- begin
- Result := '';
- if GetDistrict(S) = cdOther then
- Result := S.Values['Район'];
- A := S.Values['Улица'];
- if A <> '' then
- if Result = '' then
- Result := A
- else
- Result := Result + ', ' + A;
- B := S.Values['№ дома'];
- if B <> '' then
- if Result = '' then
- Result := B
- else
- Result := Result + ', ' + B;
- end;
- function GetKind(S: TStrings): TContentKind;
- begin
- if S.Values['Комнат'] = 'д' then
- exit(ckHouse);
- if S.Values['Планировка'] = 'к' then
- exit(ckRoom);
- Result := ckFlat;
- end;
- function GetPhone(S: TStrings): string;
- begin
- Result := S.Values['Номер'];
- if Length(Result) = 7 then
- Result := '44' + Result;
- if Length(Result) > 9 then
- Result := Copy(Result, Length(Result) - 8, 9);
- Insert('-', Result, 3);
- Insert('-', Result, 7);
- Insert('-', Result, 10);
- end;
- function GetPrice(S: TStrings): Integer;
- begin
- Result := StrToIntDef(S.Values['Цена'], -1);
- if (Result <> -1) and (S.Values['Валюта'] = '$') then
- Result := Result * DOLLAR_RATE;
- end;
- function GetPriceKind(S: TStrings): TContentPriceKind;
- begin
- Result := cpkMonthly; // уточнить!
- end;
- function GetRooms(S: TStrings): Integer;
- begin
- if GetKind(S) = ckFlat then
- Result := GetNumber(S.Values['Комнат'])
- else
- Result := -1;
- end;
- function Translate(const Line: TLine): TContent;
- var
- Struct: TStringList;
- begin
- Struct := CreateStruct(Line);
- Result := TContent.Create();
- Result.Address := UTF8Encode(GetAddress(Struct));
- Result.AreaKitchen := GetNumber(Struct.Values['Кухня']);
- Result.AreaResidential := GetNumber(Struct.Values['Жилая']);
- Result.Body := UTF8Encode(Struct.Values['Примечание']);
- Result.District := GetDistrict(Struct);
- Result.Kind := GetKind(Struct);
- Result.Level := GetNumber(Struct.Values['Этаж']);
- Result.Levels := GetNumber(Struct.Values['Этажность']);
- Result.MiscFridge := (Struct.Values['Холодильник'] = '+');
- Result.MiscFurniture := (Struct.Values['Мебель'] = '+');
- Result.MiscPhone := (Struct.Values['Телефон'] = '+');
- Result.MiscTV := (Struct.Values['Телевизор'] = '+');
- Result.Phone := GetPhone(Struct);
- Result.Price := GetPrice(Struct);
- Result.PriceKind := GetPriceKind(Struct);
- Result.Rooms := GetRooms(Struct);
- Struct.Free();
- end;
- initialization
- SetLength(Data, 0);
- end.
Add Comment
Please, Sign In to add comment