unit HTMLReplace;
{Search and Replace in HTML files}
interface
uses SysUtils,Dialogs;
procedure ReplaceStringsHTML(FileName,_OldStr,_NewStr:string;Options:TFindOptions);
{this procedure takes an HTML file stored at FileName, searches for all occurrences
of _OldStr and replaces them with the string _NewStr
Options:TFindOptions is only used to determine if replace is case sensitive
The procedure replaces texts interrupted by markup but does not iterate over
subframes. Global variables are used instead of function parameters to increase
the speed of replace procedures.
You are free to use this code. If you find an error please write to bugs(AT)supermemo(.)com
This code is part of SuperMemo 2001 source code
Written at SuperMemo R&D, December 2001
Used in TWeb component derived from TWebBrowser employed in HTML-based incremental reading
Replacing 2000 texts in a 500K file takes about 1 second. The same file takes 11 seconds
to load to Internet Explorer 6.0}
implementation
uses Basic,Files,Dial;
{those units can be removed and simple replacement procedures can be used:
const nl=#13#10
EError = ShowMessage
ReadStringFromTXTFile and WriteStringToTXTFile are simple to write and available on request}
type TMatchRecord=record
StartPos:integer;
MatchPos:byte;
ReplaceStr:string;
end;
{Global variables are used instead of function parameters to increase the speed}
var MatchRecs:array of TMatchRecord;
OldPosit,NewPosit:integer;
MatchRecsNo:byte;
OldText,NewText,OldStr,NewStr:string;
CaseSensitive:boolean;
procedure AddNewMatch;
begin
inc(MatchRecsNo);
SetLength(MatchRecs,MatchRecsNo);
MatchRecs[MatchRecsNo-1].StartPos:=NewPosit-1;
MatchRecs[MatchRecsNo-1].MatchPos:=0;
MatchRecs[MatchRecsNo-1].ReplaceStr:='';
end;
procedure DeleteMatch(MatchNo:byte);
var m:byte;
begin
for m:=MatchNo to MatchRecsNo-1 do
MatchRecs[m-1]:=MatchRecs[m];
dec(MatchRecsNo);
{SetLength(MatchRecsNo) is not used to speed up the process}
end;
procedure ReplaceMatch(MatchNo:byte);
var strg:string;
dif:integer;
begin
NewText:=copy(NewText,1,MatchRecs[MatchNo-1].StartPos-1); {copy the correct part of the new text}
NewText:=NewText+MatchRecs[MatchNo-1].ReplaceStr; {copy the replacement text}
dif:=length(NewStr)-length(OldStr);
if dif>0 then begin {add the rest of new new string if it is longer than the old one}
strg:=copy(NewStr,length(OldStr)+1,dif);
NewText:=NewText+strg;
end;
NewPosit:=length(NewText)+1;
end;
procedure AugmentMatch(MatchNo:byte;ch:char);
begin
with MatchRecs[MatchNo-1] do begin
if MatchPos<=length(NewStr) then
ReplaceStr:=ReplaceStr+NewStr[MatchPos];
if MatchPos=length(OldStr) then begin {full match found}
ReplaceMatch(MatchNo); {replace the match text with NewStr}
DeleteMatch(MatchNo); {delete the successful match}
end;
end;
end;
procedure CheckMatch(MatchNo:byte;ch:char);
var Equal:boolean;
begin
with MatchRecs[MatchNo-1] do begin
inc(MatchPos);
if CaseSensitive then
Equal:=OldStr[MatchPos]=ch
else
Equal:=UpCase(OldStr[MatchPos])=UpCase(ch);
if Equal then
AugmentMatch(MatchNo,ch) {keep on adding chars to a match that is successful}
else
DeleteMatch(MatchNo); {delete a match that failed}
end;
end;
procedure CheckMatches(ch:char);
var m:byte;
Equal:boolean;
begin
NewText:=NewText+ch;
inc(NewPosit);
if CaseSensitive then
Equal:=ch=OldStr[1]
else
Equal:=UpCase(ch)=UpCase(OldStr[1]);
if Equal then
AddNewMatch; {if the first character matches the search string, create a new match}
for m:=MatchRecsNo downto 1 do {DeleteMatch renumbers matches - this is why the downward checkup}
CheckMatch(m,ch); {delete all matches that fail on this character}
end;
procedure AddToMatches(ch:char); {copy character to all match instances}
var m:byte;
begin
for m:=1 to MatchRecsNo do
MatchRecs[m-1].ReplaceStr:=MatchRecs[m-1].ReplaceStr+ch;
NewText:=NewText+ch;
Inc(NewPosit);
end;
procedure PrepareString(var TheString:string);
begin
ReplaceString(TheString,'&','&'); {replace all & with & in TheString}
ReplaceString(TheString,'<','<');
ReplaceString(TheString,'>','>');
end;
procedure ReplaceStringsHTML(FileName,_OldStr,_NewStr:string;Options:TFindOptions);
var InMarkup:boolean;
ch:char;
begin
try
OldStr:=_OldStr;
NewStr:=_NewStr;
PrepareString(OldStr);
PrepareString(NewStr);
OldText:=Files.ReadStringFromTXTFile(FileName); {read the file into a string}
NewText:='';
OldPosit:=1;
NewPosit:=1;
MatchRecsNo:=0;
InMarkup:=false;
CaseSensitive:=(frMatchCase in Options);
while OldPosit<=length(OldText) do begin
ch:=OldText[OldPosit];
if ch='<' then
InMarkup:=true;
if InMarkup then
AddToMatches(ch) {copy markup literally}
else
CheckMatches(ch); {replace texts that match the search string}
if ch='>' then
InMarkup:=false;
inc(OldPosit);
end;
Files.WriteStringToTXTFile(FileName,NewText); {write the string back to the original file}
MatchRecs:=nil;
except
on E:Exception do EError('Error replacing texts in HTML file'+nl+FileName,E);
end;
end;
end.