(*************************************************************************
 *  CoreLowU.pas                                                         *
 *  Vladimr Slvik 2007-10                                              *
 *  Delphi 7 Personal                                                    *
 *  cp1250                                                               *
 *                                                                       *
 *  Core routines for Shades :                                           *
 *    color replacing etc.                                               *
 *                                                                       *
 *  -additional libraries: Graphics32                                    *
 *************************************************************************)

{
  This unit contains code that is time-critical for the whole program, and
  I tried to make it as fast as possible.

  Thus it is also almost always subject to optimization changes between versions,
  so don't rely on the assumption that this code really IS fast too much. It
  always appears things can be done a bit faster, and sometimes it also appears
  that I am a complete idiot ;-)

  Alas, Niklaus Wirth and his strong typing were mostly sacrificed to pointer
  unholy goodness (or goddess?) in the process :-(

  As a rule of thumb, anything in assembly should have a pascal counterpart for
  future intended multiplatform / other compiler compatibility (Mac, Lazarus?).

  And by the way - optimizing for skipping (aka "considering" first) background
  is the strongest possible optimization here, since it forms around 95% of area
  (or in other words memory) for plain vehicle sprites, and with some
  descriptions around it jumps down to cca 75% which is still way better than any
  hashing or other insanities. No stats for houses which will have a lot lower
  numbers, but then the check must be done anyway before treating generic colour,
  so it might as well come in first.

  Lots of speed-up thinking and design can be bypassed by finding common bits
  for the particular kind of colour and testing for them first.

  Final hint for asm here is, don't overdo it. Less and cheaper instructions = good.
  That's all what really matters, not code beauty or extensibility. The colours
  might as well be set in stone, the possible pool in Simutrans is exhausted, so
  this is more or less final setup. The code revolves around that a lot, so don't
  just copy and paste - think if you are in the same situation and can afford
  committing to such one way ticket.

  If you fiddle with code here and extend or create new - never forget to unlock
  FPU with EMMS when done, after using functions from GR32_Blend!
}

unit CoreLowU;

{$INCLUDE ..\Switches.inc}
{t default -}

interface

uses Classes, Graphics,
     GR32, GR32_Blend,
     OptionU, ConstStrU, ConfigU;

//------------------------------------------------------------------------------

procedure ReplaceColor(const Target: TBitmap32; const Old, New: TColor32; const Area: TRect);
// Replaces one color by another.

procedure MakeTransparent(const Target: TBitmap32; const Color: TColor32; const Area: TRect);
// Makes one color fully transparent.

function GetImagePart(const Source: TBitmap32; const Place: TPoint;
  const TargetMask: TBitmap32; var BoundingRect: TRect): Boolean;
// retrieve a part of picture separated by background color

procedure InvertMask(const Mask: TBitmap32);
// invert a mask obtained by calling GetImagePart 

procedure TileBitmap32To(const Source: TBitmap32; const Target: TBitmap32);
// tile SOURCE into TARGET

function IsDarkColor(const Color: TColor32): Boolean;
function IsPlayerPriColor(const Color: TColor32): Boolean;
function IsPlayerSecColor(const Color: TColor32): Boolean;
function IsPlayerColor(const Color: TColor32): Boolean;
function IsSpecialColor(const Color: TColor32): Boolean;
// Test if color belongs to given set. Set "special" is everything - background

function IsBackgroundColor(const Color: TColor32): Boolean;
// tests for e7ffff omitting the first byte

procedure FloodFill32(const Target: TBitmap32; const Where: TPoint;
  const FillColor: TColor32; var UndoBounds: TRect; const Diagonal: Boolean);
// surface floodfill for GR32, with diagonal spread and recording operation area

procedure CleanByMask(const Mask, Target: TBitmap32; const DoDark, DoPlayer: Boolean);
// clean special colours; Mask is a bitmap where areas to process are white
procedure CleanWhole(const Target: TBitmap32; const DoDark, DoPlayer: Boolean);
// clean special colours, same as above but everything

procedure MakeHighlighted(const Target: TBitmap32; const Options: TSdiEdOptions;
  const Area: TRect);
procedure MakeDarkened(const Target: TBitmap32; const Options: TSdiEdOptions;
  const Area: TRect);
// transform picture to desired form using special colors


procedure MakeBlended(const Target: TBitmap32; const BlendWeight: Cardinal;
  const BlendColor: TColor32);
// and without special colors - independent of mode

function HighlightedColor(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
function DarkenedColor(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
// functions for converting colours

function HighlightedColorDumb(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
function DarkenedColorDumb(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
// convert color *without* distinguishing specials

// These color functions are not optimized as they should NOT be called per-pixel,
// that's what the above ones are for...

//==============================================================================
implementation

uses SysUtils, Math,
     CalcUtilU, Types, CoreTypeU;

//------------------------------------------------------------------------------

const PictBackClr: TColor32 = BackClrTransparent;
      MaskBackClr: TColor32 = BackClrOpaque;
      PieceClr: TColor32 = $000000FF;
      // piece is fully transparent and regardless what color
      ToDoClr: TColor32 = $00FF0000;
      (* to do must be :
         1) different from both other ones
         2) fully transparent, because that's the border around which separates
         "pieces" *)

//------------------------------------------------------------------------------

function HighlightedColorDumb(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
var C: TColor32;
begin
  C:= Clr;
  with Options.HighlightMode do Result:= CombineReg(C, BlendColor, Weight);
  EMMS;
end;

//------------------------------------------------------------------------------

function HighlightedColor(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
begin
  with Options.HighlightMode do if IsDarkColor(Clr) then begin
    Result:= DarkHlC;
  end else if IsPlayerPriColor(Clr) then begin
    Result:= PlayerPriHlC;
  end else if IsPlayerSecColor(Clr) then begin
    Result:= PlayerSecHlC;
  end else Result:= HighlightedColorDumb(Clr, Options)
end;

//------------------------------------------------------------------------------

function DarkenedColorDumb(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
begin
  with Options.DarkMode do Result:= CombineReg(Clr, BlendColor, Weight);
  EMMS;
end;

//------------------------------------------------------------------------------

function DarkenedColor(const Clr: TColor32; const Options: TSdiEdOptions): TColor32;
var i: Integer;
begin
  Result:= 0;
  for i:= ScaAllStart to ScaAllEnd do if SpecColorArray[i].Normal = Clr then begin
    Result:= SpecColorArray[i].Special;
    Break;
  end;
  if Result = 0 then Result:= DarkenedColorDumb(Clr, Options);
end;

//------------------------------------------------------------------------------

procedure ReplaceScanline(const ATarget: PColor32Array;
  const AStart, ALength: DWord; const AOld, ANew: TColor32); cdecl;
{$IFNDEF NO_ASSEMBLY}
// assembly version
begin
  asm
    pushad

    mov ebx, AOld
    mov edx, ANew

    mov edi, AStart
    add edi, ALength
    shl edi, 2
    // state: edi = (length + start) * sizeof(TColor32)
    add edi, ATarget
    // state: edi = Ptr(end_of_scanline_part)

    mov esi, AStart
    shl esi, 2
    // state: esi = start * sizeof(TColor32)
    add esi, ATarget
    // state: esi = Ptr(start_of_scanline_part)

    @start:

    mov eax, [edi]
    cmp eax, ebx
    jne @loop_end
    mov [edi], edx

    @loop_end:

    sub edi, 4
    // step back by SizeOf(TColor32) to previous pixel
    cmp edi, esi
    jnb @start
    // repeat if start of scanline part is not reached

    popad
  end;
  EMMS;
end;
{$ELSE}
// native version
//   (const ATarget: PColor32Array;
//    const AStart, ALength: DWord; const AOld, ANew: TColor32)
var i, StopPtr: PColor32;
begin
  StopPtr:= PColor32(DWord(ATarget) + AStart * SizeOf(TColor32));
  i:= PColor32(DWord(StopPtr) + ALength * SizeOf(TColor32));
  while DWord(i) >= DWord(StopPtr) do begin
    if i^ = AOld then
        i^:= ANew;
    Dec(i); // automatically takes care of sizeof(TColor32)!
  end;
  EMMS;
end;
{$ENDIF}


procedure ReplaceColor(const Target: TBitmap32; const Old, New: TColor32; const Area: TRect);
var y, Width, Start: Integer;
begin
  Width:= RectWidth(Area) - 1; // last row & column do not belong to the rectangle!
  Start:= Area.Left;
  for y:= Area.Top to Area.Bottom - 1 do begin
    ReplaceScanline(Target.ScanLine[y], Start, Width, Old, New);
  end;
end;


//------------------------------------------------------------------------------

procedure TransparentScanline(const ATarget: PColor32Array;
  const AStart, ALength: DWord; const AColor: TColor32); cdecl;
{$IFNDEF NO_ASSEMBLY}
// assembly version
begin
  asm
    pushad

    mov ebx, AColor
    mov edx, $00FFFFFF
    and ebx, edx

    mov edi, AStart
    add edi, ALength
    shl edi, 2
    // state: edi = (length + start) * sizeof(TColor32)
    add edi, ATarget
    // state: edi = Ptr(end_of_scanline_part)

    mov esi, AStart
    shl esi, 2
    // state: esi = start * sizeof(TColor32)
    add esi, ATarget
    // state: esi = Ptr(start_of_scanline_part)

    @start:

    mov eax, [edi]
    and eax, edx
    cmp eax, ebx
    jne @loop_end
    mov [edi], ebx

    @loop_end:

    sub edi, 4
    // step back by SizeOf(TColor32) to previous pixel
    cmp edi, esi
    jnb @start
    // repeat if start of scanline part is not reached

    popad
  end;
  EMMS;
end;
{$ELSE}
// native version
//   (const ATarget: PColor32Array;
//    const AStart, ALength: DWord; const AColor: TColor32)
var i, StopPtr: PColor32;
    NewColor: TColor32;
begin
  NewColor:= AColor and $00FFFFFF;
  StopPtr:= PColor32(DWord(ATarget) + AStart * SizeOf(TColor32));
  i:= PColor32(DWord(StopPtr) + ALength * SizeOf(TColor32));
  while DWord(i) >= DWord(StopPtr) do begin
    if (i^ and $00FFFFFF) = NewColor then
        i^:= NewColor;
    Dec(i); // automatically takes care of sizeof(TColor32)!
  end;
  EMMS;
end;
{$ENDIF}

procedure MakeTransparent(const Target: TBitmap32; const Color: TColor32; const Area: TRect);
var y, Width, Start: Integer;
begin
  Width:= RectWidth(Area) - 1; // last row & column do not belong to the rectangle!
  Start:= Area.Left;
  for y:= Area.Top to Area.Bottom - 1 do begin
    TransparentScanline(Target.ScanLine[y], Start, Width, Color);
  end;
end;

//------------------------------------------------------------------------------

function GetImagePart(const Source: TBitmap32; const Place: TPoint;
  const TargetMask: TBitmap32; var BoundingRect: TRect): Boolean;
(* Detect the object on given position and create a mask that will allow painless
   cutting it out of the whole picture. Main code idea is almost identical with
   floodfill, but this time we're painting into another bitmap. *)
var MaxX, MaxY,
    X, Y,
    LenX, LenY,
    CntX, CntY,
    ValX, ValY,
    HiX, HiY, LoX, LoY: Integer;
    SrchRadius, i, StackPos: Integer;
    BPict, BMask: PColor32Array;
    PieceBounds: TRect;
    Continue: Boolean;

  //............................................................................
  procedure Check;
  begin
    StackPos:= (Y * LenX) + X;
    if (BPict[StackPos] <> PictBackClr) and (BMask[StackPos] = MaskBackClr) then
        BMask[StackPos]:= ToDoClr; // should be processed
  end;
  //............................................................................
begin
  Result:= False;
  BPict:= Source.Bits;
  TargetMask.SetSizeFrom(Source);
  BMask:= TargetMask.Bits;
  // "aliases" - let's have pointers for inner parts instead of WITHs etc.

  with Place, Source do Continue:=
    (PixelS[X, Y] <> PictBackClr)
    and PtInRect(BoundsRect, Place);
  if not Continue then raise EAbort.Create('Nothing there.');

  PieceBounds:= Source.BoundsRect;
  with PieceBounds do begin
    i:= Bottom; // i "hijacked" for swapping variables
    Bottom:= Top;
    Top:= i;
    i:= Left;
    Left:= Right;
    Right:= i;
  end;
  with Source do begin
    LenX:= Width;
    LenY:= Height;
    MaxX:= LenX - 1;
    MaxY:= LenY - 1;
  end;
  // "localize" variables - loop is time extensive, every bit of speed is
  // probably needed. (?)
  TargetMask.DrawMode:= dmBlend;
  TargetMask.Clear(MaskBackClr);
  // clear mask
  X:= Place.X;
  Y:= Place.Y;
  // set the starting point
  // * initialization done


  while X <> -1 do begin

    BMask[(Y * LenX) + X]:= PieceClr; // set to already visited and processed
    with PieceBounds do begin
      if Top > Y then Top:= Y;
      if Bottom < Y then Bottom:= Y;
      if Right < X then Right:= X;
      if Left > X then Left:= X;
    end;
    // update detection bounding rectangle

    // Now check the 8 surrounding pixels
    // first the 4 "straight" ones
    inc(X); // X + 1
    if (X < LenX) then Check;
    // If it's not outside, try to mark it as another checkpoint.
    dec(X); dec(X); // X - 1
    if (X > -1) then Check;
    inc(X); // restore X to original value
    inc(Y); // Y + 1
    if (Y < LenY) then Check;
    dec(Y); dec(Y); // Y - 1
    if (Y > -1) then Check;
    inc(Y); // restore Y to original value
    // and the 4 diagonally around.
    inc(X);
    inc(Y); // X + 1, Y + 1
    if (X < LenX) and (Y < LenY) then Check;
    dec(Y); dec(Y); // X + 1, Y - 1
    if (X < LenX) and (Y > -1) then Check;
    dec(X); dec(X); // X - 1, Y - 1
    if (X > -1) and (Y > -1) then Check;
    inc(Y); inc(Y); // X - 1, Y + 1
    if (X > -1) and (Y < LenY) then Check;  // (Y < LenX) ????
    inc(X);
    dec(Y); // back to original values
    // check of surroundings finished, now look for next "workpoint"

    ValX:= X;
    ValY:= Y;
    X:= -1;
    // Invalid value, will cause loop end; is changed when next point is found.
    // If no point is found and -1 stays, work is over.
    SrchRadius:= MaxIntValue([ValX, ValY, MaXX - ValX, MaxY - ValY]);
    // the largest distance between "workpoint" and picture border

    for i:= 1 to SrchRadius do begin // search incrementally in squares around
      // a square is formed by 4 lines - 4 loops

      if X = -1 then begin // if nothing was found (redundant for 1st loop) ...
        // Top: Left -> Right
        CntY:= ValY - i; // set one coordinate as fixed
        if CntY > -1 then begin // only if line intersects with picture
          LoX:= ValX - i;
          HiX:= ValX + i; // calculate start and end of line
          if LoX < 0 then LoX:= 0;
          if HiX > MaxX then HiX:= MaxX; // crop parts outside picture
          for CntX:= LoX to HiX do if BMask[(CntY * LenX) + CntX] = ToDoClr then begin
            // iterate along line and search for 1
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X = -1 then begin // ^ ... but must be here
        // Right: Top -> Bottom
        CntX:= ValX + i;
        if CntX < LenX then begin
          LoY:= ValY - i;
          HiY:= ValY + i;
          if LoY < 0 then LoY:= 0;
          if HiY > MaxY then HiY:= MaxY;
          for CntY:= LoY to HiY do if BMask[(CntY * LenX) + CntX] = ToDoClr then begin
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X = -1 then begin
        // Bottom: Right -> Left
        CntY:= ValY + i;
        if CntY < LenY then begin
          HiX:= ValX + i;
          LoX:= ValX - i;
          if LoX < 0 then LoX:= 0;
          if HiX > MaxX then HiX:= MaxX;
          for CntX:= LoX to HiX do if BMask[(CntY * LenX) + CntX] = ToDoClr then begin
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X = -1 then begin
        // Left: Bottom -> Top
        CntX:= ValX - i;
        if CntX > -1 then begin
          HiY:= ValY + i;
          LoY:= ValY - i;
          if LoY < 0 then LoY:= 0;
          if HiY > MaxY then HiY:= MaxY;
          for CntY:= LoY to HiY do if BMask[(CntY * LenX) + CntX] = ToDoClr then begin
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X <> -1 then Break; // If some of the inner loops made a hit, cancel
      // the enclosing square-iterating loop, too.
    end;

    // If nothing was found, floodfill is finished; X stays -1 and thus main
    // loop stops.
  end;
  Result:= True;
  BoundingRect:= GrowRect(PieceBounds);;
end;

//------------------------------------------------------------------------------

procedure TileBitmap32To(const Source, Target: TBitmap32);
var i, j: Integer;
begin
  (* Set cycle count to as many loops as needed to cover the whole canvas
     with background picture. *)
  if uInt64(Target.Width) * Target.Height * Source.Height * Source.Width > 0 then
    (* Do it only if both images are filled (not empty). Multiplying needs very
       very large type to avoid overflows! Think megapixels squared... 32 bits
       are certainly not enough. Alternative implementation for arbitrary numbers
       would use multiplication of Sign() calls on each number. But other mistakes
       than empty images should crash this function, not make a silent pass! *)
    for i:= 0 to DivPlus(Target.Width, Source.Width) do
      for j:= 0 to DivPlus(Target.Height, Source.Height) do
        Target.Draw(i * Source.Width, j * Source.Height, Source);
end;

//------------------------------------------------------------------------------

function IsDarkColor(const Color: TColor32): Boolean;
{$IFNDEF NO_ASSEMBLY}
asm
  // eax - result
  // edx - color
  mov edx, eax // preserve input
  mov eax, False // loop pre-condition
  or edx, $FF000000 // make sure alpha does not matter

  mov ecx, edx
  and ecx, $FF010101
  cmp ecx, $FF010101
  jne @end // pre-check for common bits
  // this version 12 bytes, dynamic ecx setting with xor-not-inc-shl 15 bytes :(

  cmp edx, $FF6B6B6B
  je @set
  cmp edx, $FF9B9B9B
  je @set
  cmp edx, $FF6B6B6B
  je @set
  cmp edx, $FFB3B3B3
  je @set
  cmp edx, $FFC9C9C9
  je @set
  cmp edx, $FFDFDFDF
  je @set
  cmp edx, $FF57656F
  je @set
  cmp edx, $FF7F9BF1
  je @set
  cmp edx, $FFFFFF53
  je @set
  cmp edx, $FFFF211D
  je @set
  cmp edx, $FF01DD01
  je @set
  cmp edx, $FFE3E3FF
  je @set
  cmp edx, $FFC1B1D1
  je @set
  cmp edx, $FF4D4D4D
  je @set
  cmp edx, $FFFF017F
  je @set
  cmp edx, $FF0101FF
  jne @end
  @set:
  mov eax, True
  @end:
end;
{$ELSE}
var i: Integer;
begin
  Result:= False;
  if (Color and $FF010101) = $FF010101 then for i:= ScaDarkStart to ScaDarkEnd do
    // pre-check common bits
    if Color = SpecColorArray[i].Normal then begin
      Result:= True;
      Break;
    end;
end;
{$ENDIF}

//------------------------------------------------------------------------------

function IsPlayerPriColor(const Color: TColor32): Boolean;
{$IFNDEF NO_ASSEMBLY}
asm
  push ecx
  // eax - result
  // edx - color
  mov edx, eax // move input from eax to edx
  mov eax, False // now eax is already playing role of output
  or edx, $FF000000
  mov ecx, edx
  or ecx, $FFFDFFFF
  cmp ecx, $FFFDFFFF
  jne @end
  // init + pre-check as for night

  cmp edx, $FF244B67
  je @set
  cmp edx, $FF395E7C
  je @set
  cmp edx, $FF4C7191
  je @set
  cmp edx, $FF6084A7
  je @set
  cmp edx, $FF7497BD
  je @set
  cmp edx, $FF88ABD3
  je @set
  cmp edx, $FF9CBEE9
  je @set
  cmp edx, $FFB0D2FF
  jne @end
  @set:
  mov eax, True
  @end:
  pop ecx
end;
{$ELSE}
var i: Integer;
begin
  Result:= False;
  if (Color or $FFFDFFFF) = $FFFDFFFF then
      for i:= ScaPlayerPriStart to ScaPlayerPriEnd do
      if Color = SpecColorArray[i].Normal then begin
    Result:= True;
    Break;
  end;
end;
{$ENDIF}

//------------------------------------------------------------------------------

function IsPlayerColor(const Color: TColor32): Boolean;
{$IFNDEF NO_ASSEMBLY}
asm
  push ecx
  // eax - input & output register
  // ecx - color
  mov ecx, eax
  call IsPlayerPriColor
  cmp eax, True
  je @set
  mov eax, ecx
  call IsPlayerSecColor
  @set:
  pop ecx
end;
{$ELSE}
var i: Integer;
begin
  Result:= False;
  for i:= ScaPlayerPriStart to ScaPlayerSecEnd do
      if Color = SpecColorArray[i].Normal then begin
    Result:= True;
    Break;
  end;
end;
{$ENDIF}

//------------------------------------------------------------------------------

function IsPlayerSecColor(const Color: TColor32): Boolean;
{$IFNDEF NO_ASSEMBLY}
asm
  push ecx
  // eax - result
  // edx - color
  mov edx, eax
  mov eax, False
  or edx, $FF000000
  mov ecx, edx
  or ecx, $FFFFFF0F
  cmp ecx, $FFFFFF0F
  jne @end
  // init + pre-check as for night

  cmp edx, $FF7B5803
  je @set
  cmp edx, $FF8E6F04
  je @set
  cmp edx, $FFA18605
  je @set
  cmp edx, $FFB49D07
  je @set
  cmp edx, $FFC6B408
  je @set
  cmp edx, $FFD9CB0A
  je @set
  cmp edx, $FFECE20B
  je @set
  cmp edx, $FFFFF90D
  jne @end
  @set:
  mov eax, True
  @end:
  pop ecx
end;
{$ELSE}
var i: Integer;
begin
  Result:= False;
  if (Color or $FFFFFF0F) = $FFFFFF0F then
      for i:= ScaPlayerPriStart to ScaPlayerPriEnd do
      if Color = SpecColorArray[i].Normal then begin
    Result:= True;
    Break;
  end;
end;
{$ENDIF}

//------------------------------------------------------------------------------

function IsSpecialColor(const Color: TColor32): Boolean;
{$IFNDEF NO_ASSEMBLY}
asm
  // eax - input & output register
  // ecx - color
  mov ecx, eax // backup
  call IsBackgroundColor
  cmp eax, True
  je @set
  mov eax, ecx
  call IsPlayerPriColor
  cmp eax, True
  je @set
  mov eax, ecx
  call IsPlayerSecColor
  cmp eax, True
  je @set
  mov eax, ecx
  call IsDarkColor
  @set:
end;
{$ELSE}
var i: Integer;
begin
  Result:= False;
  for i:= ScaAllStart to ScaAllEnd - 1 do // omit background
    if Color = SpecColorArray[i].Normal then begin
      Result:= True;
      Break;
    end;
end;
{$ENDIF}

//------------------------------------------------------------------------------

function IsBackgroundColor(const Color: TColor32): Boolean;
begin
  Result:= (Color and $00FFFFFF) = $00E7FFFF;
end;

//------------------------------------------------------------------------------

procedure FloodFill32(const Target: TBitmap32; const Where: TPoint;
  const FillColor: TColor32; var UndoBounds: TRect; const Diagonal: Boolean);
(*
  FloodFill

  Idea: I have another "array" which has 0 where nothing should be done and 1
  where processing should take place. The highest loop just takes a set of
  coordinates, puts color there, takes care of undo things, looks at nearby
  pixels, and if they are suitable, marks them in the paralell array as todo (1)
  and lets another loop inside it find next point for working.

  The nested loop searches for 1s in a pattern which can be described as circling
  around widening squares - somewhat straight spiral, in fact. Visually:

              A
              | widening
              |                          --------------> etc.
            +-----------+                A ---------->
            | +-------+ |                | A ------> |
            | | +---+ | |                | | A /-> | |
   <------- | | | O | | | ------->       | | | O | | |
   widening | | +---+ | | widening       | | < --V | |
            | +-------+ |                | <-------V |
            +-----------+                <-----------V
              | widening
              |
              V

  Each of the 4 lines that form one square are again one nested loop (2nd level).

  ! The direction of arows does not follow real program loops

  This should be the most effective way when the center of spiral is in last
  modified point; the probability that one of its surrounding points is marked
  as todo should be on average quite good. The spiral pattern is best for finding
  these near points, because it "enlarges" in all 4 directions evenly.

*)
var FillClr, OldClr: TColor32;
    Stack: array of Byte; // "paralell array"
    MaxX, MaxY,
    X, Y,
    LenX, LenY,
    CntX, CntY,
    ValX, ValY,
    HiX, HiY, LoX, LoY: Integer;
    SrchRadius, i, StackPos: Integer;
    B: PColor32Array;
    //Diagonal: Boolean;
    (* explanation of variables:
      Max? - maximal possible
      Len? - size (Max + 1)
      Cnt? - counter for for 2nd level loops
      Val? - backup for finding loops
      Hi?, Lo? - 2nd level loops (for i:= Lo? to Hi?)
      ? - position for next loop iteration
      SrchRadius - maximal distance to search in
      i - main finding (square) loop counter
      StackPos - X,Y converted into one dimensional array position
    *)
begin
  with Where do OldClr:= Target.PixelS[X, Y];
  FillClr:= FillColor;
  if OldClr = FillClr then raise EAbort.Create('fill with the same color!');
  UndoBounds:= Target.BoundsRect;
  with UndoBounds do begin
    Y:= Bottom; // Y hijacked for swapping variables
    Bottom:= Top;
    Top:= Y;
    Y:= Left;
    Left:= Right;
    Right:= Y;
  end;
  // Make it so that the rectangle is "inverted" - this is a safe starting value.
  with Target do begin
    SetLength(Stack, Width * Height);
    B:= Bits;
    LenX:= Width;
    LenY:= Height;
    MaxX:= LenX - 1;
    MaxY:= LenY - 1;
  end; // "localize" variables - loop is time extensive, every bit of speed is
  // probably needed.
  Stack[Where.X + (Where.Y * LenX)]:= 1;
  // set the starting point as "to do"
  X:= Where.X;
  Y:= Where.Y;
  // initialization done

  while X <> -1 do with Target do begin

    Stack[X + (Y * LenX)]:= 2; // now is (will be) already visited -> 2
    PixelS[X, Y]:= FillClr; // ... put color ...
    with UndoBounds do begin
      if Top > Y then Top:= Y;
      if Bottom < Y then Bottom:= Y;
      if Right < X then Right:= X;
      if Left > X then Left:= X;
    end; // ... and remember to update change bounding rectangle

    // Now check the 4 surrounding pixels.
    inc(X); // X + 1
    StackPos:= X + (Y * LenX);
    if (X < LenX) and (B[StackPos] = OldClr) then Stack[StackPos]:= 1;
    // If it's not outside, try to mark it as another checkpoint.
    dec(X, 2); // X - 1
    StackPos:= X + (Y * LenX);
    if (X > -1) and (B[StackPos] = OldClr) then Stack[StackPos]:= 1;
    inc(X); // restore X to original value
    inc(Y); // Y + 1
    StackPos:= X + (Y * LenX);
    if (Y < LenY) and (B[StackPos] = OldClr) then Stack[StackPos]:= 1;
    dec(Y, 2); // Y - 1
    StackPos:= X + (Y * LenX);
    if (Y > -1) and (B[StackPos] = OldClr) then Stack[StackPos]:= 1;
    inc(Y); // restore Y to original value

    // and the 4 diagonally around
    if Diagonal then begin
      inc(X);
      inc(Y);
      StackPos:= X + (Y * LenX); // X + 1, Y + 1
      if (X < LenX) and (Y < LenY) and (B[StackPos] = OldClr) then
        Stack[StackPos]:= 1;
      dec(Y, 2);
      StackPos:= X + (Y * LenX); // X + 1, Y - 1
      if (X < LenX) and (Y > -1) and (B[StackPos] = OldClr) then
        Stack[StackPos]:= 1;
      dec(X, 2);
      StackPos:= X + (Y * LenX); // X - 1, Y - 1
      if (X > -1) and (Y > -1) and (B[StackPos] = OldClr) then
        Stack[StackPos]:= 1;
      inc(Y, 2);
      StackPos:= X + (Y * LenX); // X - 1, Y + 1
      if (X > -1) and (Y < LenX) and (B[StackPos] = OldClr) then
        Stack[StackPos]:= 1;
      inc(X);
      dec(Y);
    end;

    ValX:= X;
    ValY:= Y;
    X:= -1; // Invalid value, will cause loop end; is changed when next point
    // is found.
    SrchRadius:= MaxIntValue([ValX, ValY, MaXX - ValX, MaxY - ValY]);


    for i:= 1 to SrchRadius do begin // search incrementally in squares around
      // a square is formed by 4 lines - 4 loops

      if X = -1 then begin // if nothing was found (redundant for 1st loop) ...
        // Top: Left -> Right
        CntY:= ValY - i; // set one coordinate as fixed
        if CntY > -1 then begin // only if line intersects with picture
          LoX:= ValX - i;
          HiX:= ValX + i; // calculate start and end of line
          if LoX < 0 then LoX:= 0;
          if HiX > MaxX then HiX:= MaxX; // crop parts outside picture
          for CntX:= LoX to HiX do if Stack[CntX + (CntY * LenX)] = 1 then begin
            // iterate along line and search for 1
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X = -1 then begin // ^ ... but must be here
        // Right: Top -> Bottom
        CntX:= ValX + i;
        if CntX < LenX then begin
          LoY:= ValY - i;
          HiY:= ValY + i;
          if LoY < 0 then LoY:= 0;
          if HiY > MaxY then HiY:= MaxY;
          for CntY:= LoY to HiY do if Stack[CntX + (CntY * LenX)] = 1 then begin
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X = -1 then begin
        // Bottom: Right -> Left
        CntY:= ValY + i;
        if CntY < LenY then begin
          HiX:= ValX + i;
          LoX:= ValX - i;
          if LoX < 0 then LoX:= 0;
          if HiX > MaxX then HiX:= MaxX;
          for CntX:= LoX to HiX do if Stack[CntX + (CntY * LenX)] = 1 then begin
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X = -1 then begin
        // Left: Bottom -> Top
        CntX:= ValX - i;
        if CntX > -1 then begin
          HiY:= ValY + i;
          LoY:= ValY - i;
          if LoY < 0 then LoY:= 0;
          if HiY > MaxY then HiY:= MaxY;
          for CntY:= LoY to HiY do if Stack[CntX + (CntY * LenX)] = 1 then begin
            X:= CntX;
            Y:= CntY;
            Break;
          end;
        end;
      end;

      if X <> -1 then Break; // If some of the inner loops made a hit, cancel
      // the enclosing square-iterating loop, too.
    end;


    // If nothing was found, floodfill is finished; X stays -1 and
    // thus main loop stops.
  end; // WHILE
  with UndoBounds do begin
    Inc(Bottom);
    Inc(Right);
  end;
end;

//------------------------------------------------------------------------------

procedure CleanWhole(const Target: TBitmap32; const DoDark, DoPlayer: Boolean);
var BTarget: PColor32Array;
    i, CycleLenght: Integer;
begin
  BTarget:= Target.Bits;
  with Target do CycleLenght:= Width * Height;
  if CycleLenght > 0 then for i:= 0 to CycleLenght - 1 do begin
    // Cycle through all Mask's pixels and if some are marked as "work here", check
    // for special colors.
    if (DoDark and IsDarkColor(BTarget[i])) or (DoPlayer and IsPlayerColor(BTarget[i]))
      then BTarget[i]:= ColorAdd(BTarget[i], $FF010101);
  end;
  Emms;
end;

//------------------------------------------------------------------------------

procedure CleanByMask(const Mask, Target: TBitmap32; const DoDark, DoPlayer: Boolean);
// clean special colors; Mask is a bitmap where areas to process are white
var BMask, BTarget: PColor32Array;
    i, CycleLenght: Integer;
begin
  BMask:= Mask.Bits;
  BTarget:= Target.Bits;
  with Mask do
  CycleLenght:=  Width * Height;
  if CycleLenght > 0 then for i:= 0 to CycleLenght - 1 do
    if BMask[i] = $FFFFFFFF then
      // Cycle through all Mask's pixels and if some are marked as "work here", check
      // for special colors.
      if (DoDark and IsDarkColor(BTarget[i])) or
        (DoPlayer and IsPlayerColor(BTarget[i]))
      then
        BTarget[i]:= ColorAdd(BTarget[i], $FF010101);
  Emms;
end;

//------------------------------------------------------------------------------

procedure MakeBlended(const Target: TBitmap32; const BlendWeight: Cardinal;
  const BlendColor: TColor32);
(* Blends the whole bitmap with one color and weight.
   The cycle goes from last pixel back towards first, so that there is needed no
   loop variable and loop condition is a simple pointer < base check. *)

{$IFNDEF NO_ASSEMBLY}
// asm version
var CycleLength: Integer;
    B: PColor32Array;
    BlendCl: TColor32;
begin
  CycleLength:= Target.Width * Target.Height;
  B:= Target.Bits;
  if CycleLength > 0 then begin
    BlendCl:= BlendColor or $FF000000;
    asm
      {
        eax - pixel data
        ebx - counter down to 0
        ecx - BlendWeight
        edx - BlendCl for CombineReg calls
        esi - base pointer
        edi - "persistent" copy of BlendCl
      }
      pushad
      // preserve everything for Delphi
      mov edi, BlendWeight
      and edi, $000000FF
      // Ensure value inside 0..255 - assumed in CombineReg, otherwise crash.
      // Strange results don't matter, this code is used only for background
      // so glitches are better than crashes.
      // EDI is not further modified.
      mov edx, BlendCl
      // blending color - EDX not further modified
      mov ebx, CycleLength
      dec ebx
      shl ebx, 2
      // EBX:= (CycleLength - 1) * 4
      mov esi, B
      // Bitmap's "scanline" pointer -> ESI ...
      add ebx, esi
      // ... and add to it its length-1 in dwords (32b) -> EBX points at last
      // pixel in data buffer.

      @start:
      mov eax, [ebx]
      // copy pixel value
      mov ecx, edi
      // ECX is where BlendWeight goes for calls; it gets overwritten by
      // CombineReg, renew its value every cycle
      call CombineReg
      mov [ebx], eax
      (*dec ebx
      dec ebx
      dec ebx
      dec ebx*)
      sub ebx, 4
      // move "head" pointer one pixel (32b) back
      cmp ebx, esi
      // Loop end condition is when EBX points lower than ESI = negative pixel
      // position before start of data buffer.
      jnb @start

      popad
    end;
    EMMS;
  end;
end;

{$ELSE} //......................................................................
// native version

// (const Target: TBitmap32; const BlendWeight: Cardinal;
//  const BlendColor: TColor32);
var CycleLength, i: Integer;
    B: PColor32Array;
    BlendCl: TColor32;
begin
  CycleLength:= Target.Width * Target.Height;
  B:= Target.Bits;
  if CycleLength > 0 then begin
    BlendCl:= BlendColor or $FF000000;
    for i:= 0 to CycleLength - 1 do
      B[i]:= CombineReg(B[i], BlendCl, BlendWeight);
    EMMS;
  end;
end;

{$ENDIF}

//------------------------------------------------------------------------------

procedure InvertMask(const Mask: TBitmap32);
var B: PColor32Array;
    i: Integer;
begin
  B:= Mask.Bits;
  for i:= 0 to Mask.Height * Mask.Width - 1 do begin
    if B[i] = MaskBackClr then B[i]:= PieceClr else B[i]:= MaskBackClr;
  end;
end;

//------------------------------------------------------------------------------
(*
    The following two blobs of code take a  bitmap + rectangle + options  and
    convert that to darkened or highlighted form. Thus they can't just run on
    the whole internal buffer, so a mixed approach is used: the outwardly
    visible routine "dumbly" iterates over rows (scanlines) and the hidden ones
    (and damn long too) work on subsections of these. That's where all the real
    work happens. Happy reading *grin*
*)
//------------------------------------------------------------------------------

procedure HighlightedScanline(const ATarget: PColor32Array;
  const AStart, ALength: DWord; const AOptions: PSdiEdOptions); cdecl;
{$IFNDEF NO_ASSEMBLY}
var BlendWeight: Cardinal;
    DarkCl, PlayerPriCl, PlayerSecCl, BlendCl: TColor32;
begin
  with AOptions.HighlightMode do begin
    DarkCl:= DarkHlC;
    PlayerPriCl:= PlayerPriHlC;
    PlayerSecCl:= PlayerSecHlC;
    BlendWeight:= Weight;
    BlendCl:= BlendColor or $FF000000;
  end;
  asm
    (*
    eax - pixel data
    ebx - BlendWeight
    ecx - various usage
    edx - BlendCl
    edi - loop iterator & pointer to current pixel; starts at end
    esi - pointer to buffer start
    ATarget is "absolute" pointer, all other measures are in multiples of
      SizeOf(TColor32)
    Remember: this loop in assembly runs from higher addresses to lower,
      backwards on the pixel scanline.
    *)
    pushad
    // Save everything - Delphi allows free modification of only some
    // registers but all will be used!

    // now prepare these two pointers for crawling the scanline:

    mov edi, AStart
    add edi, ALength
    shl edi, 2
    // state: edi = (length + start) * sizeof(TColor32)
    add edi, ATarget
    // state: edi = Ptr(end_of_scanline_part)

    mov esi, AStart
    shl esi, 2
    // state: esi = start * sizeof(TColor32)
    add esi, ATarget
    // state: esi = Ptr(start_of_scanline_part)

    // initialization of colours
    mov edx, BlendCl
    mov ebx, BlendWeight
    and ebx, $000000FF
    // weight is only last byte even if passed around as dword!

    // the loop itself
    @start:
    mov eax, [edi]
    // take current pixel

    and eax, $00FFFFFF
    cmp eax, $00E7FFFF
    je @loop_end
    mov eax, [edi]
    // skip background

    // night colours:
    cmp eax, $FF6B6B6B
    je @night
    cmp eax, $FF9B9B9B
    je @night
    cmp eax, $FFB3B3B3
    je @night
    cmp eax, $FFC9C9C9
    je @night
    cmp eax, $FFDFDFDF
    je @night
    cmp eax, $FF57656F
    je @night
    cmp eax, $FF7F9BF1
    je @night
    cmp eax, $FFFFFF53
    je @night
    cmp eax, $FFFF211D
    je @night
    cmp eax, $FF01DD01
    je @night
    cmp eax, $FFE3E3FF
    je @night
    cmp eax, $FFC1B1D1
    je @night
    cmp eax, $FF4D4D4D
    je @night
    cmp eax, $FFFF017F
    je @night
    cmp eax, $FF0101FF
    je @night
    jmp @test_player1

    @night:
    // ok, got night, handle as night
    mov ecx, DarkCl
    mov [edi], ecx
    jmp @loop_end

    @test_player1:
    // primary player's
    cmp eax, $FF395E7C
    je @player1
    cmp eax, $FF4C7191
    je @player1
    cmp eax, $FF6084A7
    je @player1
    cmp eax, $FF7497BD
    je @player1
    cmp eax, $FF88ABD3
    je @player1
    cmp eax, $FF9CBEE9
    je @player1
    cmp eax, $FFB0D2FF
    je @player1
    cmp eax, $FF244B67
    je @player1
    jmp @test_player2

    @player1:
    mov ecx, PlayerPriCl
    mov [edi], ecx
    jmp @loop_end

    @test_player2:
    cmp eax, $FF7B5803
    je @player2
    cmp eax, $FF8E6F04
    je @player2
    cmp eax, $FFA18605
    je @player2
    cmp eax, $FFB49D07
    je @player2
    cmp eax, $FFC6B408
    je @player2
    cmp eax, $FFD9CB0A
    je @player2
    cmp eax, $FFECE20B
    je @player2
    cmp eax, $FFFFF90D
    je @player2
    jmp @other

    @player2:
    mov ecx, PlayerSecCl
    mov [edi], ecx
    jmp @loop_end

    @other:
    // didn't hit anything, not special
    mov ecx, ebx
    call CombineReg
    // just darken
    mov [edi], eax
    jmp @loop_end

    @loop_end:
    sub edi, 4
    // step back by SizeOf(TColor32) to previous pixel
    cmp edi, esi
    jnb @start
    // repeat if start of scanline part is not reached

    popad
  end;
  EMMS;
end;
{$ELSE}
// native version
//   (const ATarget: PColor32Array; const AStart, ALength: DWord;
//   const AOptions: PSdiEdOptions)
var i, StopPtr: PColor32;
    BlendWeight: Cardinal;
    DarkCl, PlayerPriCl, PlayerSecCl, BlendCl: TColor32;
begin
  with AOptions.HighlightMode do begin
    DarkCl:= DarkHlC;
    PlayerPriCl:= PlayerPriHlC;
    PlayerSecCl:= PlayerSecHlC;
    BlendWeight:= Weight;
    BlendCl:= BlendColor or $FF000000;
  end;
  StopPtr:= PColor32(DWord(ATarget) + AStart * SizeOf(TColor32));
  i:= PColor32(DWord(StopPtr) + ALength * SizeOf(TColor32));
  while DWord(i) >= DWord(StopPtr) do begin
    if not IsBackgroundColor(i^) then begin
      // first check for background since it forms most of typical picture!
      // and skip of course and save us the hassle :-)
      if IsDarkColor(i^) then
        i^:= DarkCl
      else if IsPlayerPriColor(i^) then
        i^:= PlayerPriCl
      else if IsPlayerSecColor(i^) then
        i^:= PlayerSecCl
      else
        i^:= CombineReg(i^, BlendCl, BlendWeight);
    end;
    Dec(i); // automatically takes care of sizeof(TColor32)!
  end;
  EMMS;
end;
{$ENDIF}

procedure MakeHighlighted(const Target: TBitmap32; const Options: TSdiEdOptions;
  const Area: TRect);
var y, Width, Start: Integer;
begin
  Width:= RectWidth(Area) - 1; // last row & column do not belong to the rectangle!
  Start:= Area.Left;
  for y:= Area.Top to Area.Bottom - 1 do begin
    HighlightedScanline(Target.ScanLine[y], Start, Width, Addr(Options));
  end;
end;

//------------------------------------------------------------------------------

procedure DarkScanline(const ATarget: PColor32Array;
  const AStart, ALength: DWord; const AOptions: PSdiEdOptions); cdecl;
{$IFNDEF NO_ASSEMBLY}
var BlendWeight: Cardinal;
    BlendCl: TColor32;
begin
  with AOptions.DarkMode do begin
    BlendWeight:= Weight;
    BlendCl:= BlendColor or $FF000000;
  end;
  asm
    (*
    eax - pixel data
    ebx - BlendWeight
    ecx - various usage
    edx - BlendCl
    edi - loop iterator & pointer to current pixel
    esi - pointer to buffer start
    *)
    pushad

    mov edi, AStart
    add edi, ALength
    shl edi, 2
    // state: edi = (length + start) * sizeof(TColor32)
    add edi, ATarget
    // state: edi = Ptr(end_of_scanline_part)

    mov esi, AStart
    shl esi, 2
    // state: esi = start * sizeof(TColor32)
    add esi, ATarget
    // state: esi = Ptr(start_of_scanline_part)

    mov edx, BlendCl
    mov ebx, BlendWeight
    and ebx, $000000FF
    // initialization

    @start:
    mov eax, [edi]

    // background:
    and eax, $00FFFFFF
    cmp eax, $00E7FFFF
    je @loop_end
    mov eax, [edi]

    // nondarkening colours:
    cmp eax, $FF6B6B6B
    je @loop_end
    cmp eax, $FF9B9B9B
    je @loop_end
    cmp eax, $FFB3B3B3
    je @loop_end
    cmp eax, $FFC9C9C9
    je @loop_end
    cmp eax, $FFDFDFDF
    je @loop_end
    cmp eax, $FFFF017F
    je @loop_end
    cmp eax, $FF0101FF
    je @loop_end

    // other night colours:
    cmp eax, $FF57656F
    jne @next01
    mov [edi], $FFD6C384
    jmp @loop_end
    @next01:
    cmp eax, $FF7F9BF1
    jne @next02
    mov [edi], $FF84C3D6
    jmp @loop_end
    @next02:
    cmp eax, $FFFFFF53
    jne @next03
    mov [edi], $FFFFFF52
    jmp @loop_end
    @next03:
    cmp eax, $FFFF211D
    jne @next04
    mov [edi], $FFFF2018
    jmp @loop_end
    @next04:
    cmp eax, $FF01DD01
    jne @next05
    mov [edi], $FF00DF00
    jmp @loop_end
    @next05:
    cmp eax, $FFE3E3FF
    jne @next06
    mov [edi], $FFFFFFE7
    jmp @loop_end
    @next06:
    cmp eax, $FFC1B1D1
    jne @next07
    mov [edi], $FFD6C384
    jmp @loop_end
    @next07:
    cmp eax, $FF4D4D4D
    jne @next08
    mov [edi], $FFD6C384
    jmp @loop_end
    @next08:

    // not glowable!
    mov ecx, ebx
    call CombineReg
    mov [edi], eax
    jmp @loop_end

    @loop_end:
    sub edi, 4
    cmp edi, esi
    jnb @start

    popad
  end;
  EMMS;
end;
{$ELSE}
// native version
//   (const ATarget: PColor32Array; const AStart, ALength: DWord;
//   const AOptions: PSdiEdOptions)
var i, StopPtr: PColor32;
    BlendWeight: Cardinal;
    BlendCl: TColor32;
    // local variables should be faster
begin                                
  with AOptions.DarkMode do begin
    BlendWeight:= Weight;
    BlendCl:= BlendColor or $FF000000;
  end;
  StopPtr:= PColor32(DWord(ATarget) + AStart * SizeOf(TColor32));
  i:= PColor32(DWord(StopPtr) + ALength * SizeOf(TColor32));
  while DWord(i) >= DWord(StopPtr) do if not IsBackgroundColor(i^) then begin
    case i^ of
      $FF6B6B6B, $FF9B9B9B, $FFB3B3B3, $FFC9C9C9, $FFDFDFDF,
      $FF0101FF, $FFFF017F: ;
      // non-darkening greys and new light colors
      $FF57656F: i^:= $FFD6C384;
      $FF7F9BF1: i^:= $FF84C3D6;
      $FFFFFF53: i^:= $FFFFFF52;
      $FFFF211D: i^:= $FFFF2018;
      $FF01DD01: i^:= $FF00DF00;
      $FFE3E3FF: i^:= $FFFFFFE7;
      $FFC1B1D1: i^:= $FFD6C384;
      $FF4D4D4D: i^:= $FFD6C384;
      // changing lights
      else i^:= CombineReg(i^, BlendCl, BlendWeight);
    end;
    Dec(i); // automatically takes care of sizeof(TColor32)!
  end;
  EMMS;
end;
{$ENDIF}

procedure MakeDarkened(const Target: TBitmap32; const Options: TSdiEdOptions;
  const Area: TRect);
var y, Width, Start: Integer;
begin
  Width:= RectWidth(Area) - 1;
  Start:= Area.Left;
  for y:= Area.Top to Area.Bottom - 1 do begin
    DarkScanline(Target.ScanLine[y], Start, Width, Addr(Options));
  end;
end;

//------------------------------------------------------------------------------

end.

