Raycaster 中的高效 floor/ceiling 渲染

Efficient floor/ceiling rendering in Raycaster

我在我的 Raycaster 引擎上工作了一段时间,我在较慢的机器上运行。 我遇到的最具挑战性的问题是 was/is 高效的地板和天花板铸造。

我的问题是: 我可以使用什么其他更快的方法? (我不确定 Doom 地板和天花板是如何渲染的)

到目前为止,我尝试了两种典型的解决方案:

水平方法当然要快得多,但我另外用定点变量对其进行了优化。

不幸的是,即使这种方法也是性能杀手 - 即使在较快的 CPU 上,fps 也会下降相当大,而较慢的 CPU 则是瓶颈。

我的其他想法:

到目前为止我的进步: https://www.youtube.com/watch?v=u3zA2Wh0NB4

编辑 (1):

Floor and Ceiling rednering代码(基于od lodev tutorial the horizo​​ntal approach) 但用定点优化。 Ceil 计算反映到 floor。

https://lodev.org/cgtutor/raycasting2.html

这种方法比垂直方法快,但是很多计算是内循环的,随机访问纹理像素会影响性能..

inline void RC_Raycast_Floor_Ceiling()
{
    // get local copy of RC_level_textures
    sBM_Bitmap* level_textures = RC_level_textures;

    // ray direction for leftmost ray (x = 0) and rightmost ray (x = width)
    float32 r_dx0 = RC_pp_dx - RC_pp_nsize_x_d2;
    float32 r_dy0 = RC_pp_dy - RC_pp_nsize_y_d2;

    //float32 r_dx1 = RC_pp_dx + RC_pp_nsize_x_d2;
    //float32 r_dy1 = RC_pp_dy + RC_pp_nsize_y_d2;

    // precalculated helpers for performance
    float32 r_dx1_m_dx0_div_width = (RC_pp_nsize_x_d2 + RC_pp_nsize_x_d2) * RC_render_width__1div__f;
    float32 r_dy1_m_dy0_div_width = (RC_pp_nsize_y_d2 + RC_pp_nsize_y_d2) * RC_render_width__1div__f;

    int16 ray_y = -1;
    u_int16 ray_y_counter = RC_render_height__i;

    u_int8* walls_buffer__ptr = RC_walls_buffer;

    // casting floor and ceiling - horizontal line by line - from left to right
    while(ray_y_counter)
    {
        ray_y++;
        ray_y_counter--;

        // dont go further if the current floor/ceil scanline line won't be visible
        if (ray_y >= RC_walls_start)
        {
            break;
            if (ray_y < RC_walls_end)
            {
                ray_y = RC_walls_end;
                ray_y_counter = RC_walls_start - 1;
                walls_buffer__ptr += ((RC_walls_end - RC_walls_start) * RC_render_width__i);
                continue;
            }
        }
      
        // whether this section is floor or ceiling
        u_int8 is_floor = ray_y > RC_render_height__div2__i;

        // current ray y position compared to the center of the screen (the horizon)
        float32 ry_pos = (float32)(is_floor ? (ray_y - RC_render_height__div2__i) : (RC_render_height__div2__i - ray_y));

        // vertical position of projection plane, 0.5 is between floor and ceiling
        float32 pp_z = (float32)(is_floor ? (RC_render_height__div2__i) : (RC_render_height__div2__i));

        float32 straight_distance_to_point = pp_z / ry_pos;

        // calculate the real world step vector we have to add for each x (parallel to camera plane)
        // adding step by step avoids multiplications with a weight in the inner loop
        float32 floor_step_x = straight_distance_to_point * r_dx1_m_dx0_div_width;
        float32 floor_step_y = straight_distance_to_point * r_dy1_m_dy0_div_width;

        float32 floor_x = RC_player_x + straight_distance_to_point * r_dx0;
        float32 floor_y = RC_player_y + straight_distance_to_point * r_dy0;

        // convert that values to fixed point
        int32 floor_x__fp = (int32)(floor_x * 65536.0f);
        int32 floor_y__fp = (int32)(floor_y * 65536.0f);

        int32 floor_step_x__fp = (int32)(floor_step_x * 65536.0f);
        int32 floor_step_y__fp = (int32)(floor_step_y * 65536.0f);

        u_int32 ry_m_render_width = ray_y * RC_render_width__i;
        u_int32 ry_m_render_width_i_mirror = (RC_render_height__i- ray_y-1) * RC_render_width__i;

        int16 ray_x = -1;
        u_int16 ray_x_counter = RC_render_width__i;

        sLV_Cell* level_map = RC_level->map;

        // drawing floor and ceiling from left to right
        while(ray_x_counter)
        {
            ray_x++;
            ray_x_counter--;

            floor_x__fp += floor_step_x__fp;
            floor_y__fp += floor_step_y__fp;

            if (*walls_buffer__ptr != 0)
            {
                walls_buffer__ptr++;
                continue;
            }

            walls_buffer__ptr++;

            u_int32 output_pixel_index = ray_x + ry_m_render_width;
            u_int32 output_pixel_index_mirror = ray_x + ry_m_render_width_i_mirror;

            // the cell coord is simply got from the integer parts of floorX and floorY

            u_int32 curr_cell_x = (floor_x__fp & FP_INTEGER_MASK_16) >> 16;
            u_int32 curr_cell_y = (floor_y__fp & FP_INTEGER_MASK_16) >> 16;

            // prevent overflow
           // curr_cell_x &= LV_MAP_SIZE_m1;
           // curr_cell_y &= LV_MAP_SIZE_m1;

            u_int32 texture_pixel_x = (floor_x__fp & FP_FRACTION_MASK_16) >> 9;
            u_int32 texture_pixel_y = (floor_y__fp & FP_FRACTION_MASK_16) >> 9;

            u_int32 cell_index = curr_cell_x + (curr_cell_y << LV_MAP_SIZE_BITSHIFT);

            // get the texture index depending on the cell
            u_int32 texture_index;

        /*    if (is_floor)
            {
               
                texture_index = level_map[cell_index].floor_id;
            }
            else
            {
                texture_index = level_map[cell_index].ceil_id;
            }*/

            texture_index = level_map[cell_index].ceil_id;

            // get pixel coords in texture
            u_int32 tex_index = texture_pixel_x + (texture_pixel_y << 7);
            u_int32 texture_current_pixel = level_textures[0].pixels[tex_index];

            RC_output_buffer_32[output_pixel_index] = texture_current_pixel;

            texture_index = level_map[cell_index].floor_id;
            texture_current_pixel = level_textures[texture_index].pixels[tex_index];

            RC_output_buffer_32[output_pixel_index_mirror] = texture_current_pixel;
        }
    }
}

I will refer my ray cast engine so here some stuff that will help you understand it. Lets start with class declarations:

const int   _Doom3D_mipmaps=8;          // number of mipmaps for texture
const DWORD _Doom3D_edit_cell_size=4;   // [pixel] 2D map cell size
const DWORD _Doom3D_cell_size=100;      // [units] cell cube size
//                                              heigh     ceil   floor  wall
const DWORD _Doom3D_cell_floor=(                0<<24)|( 0<<16)|(18<<8)|(39);
const DWORD _Doom3D_cell_wall =(_Doom3D_cell_size<<24)|( 0<<16)|( 0<<8)|(39);
class Texture2D // 2D textures
    {
public:
    Graphics::TBitmap *bmp;
    int xs,ys; DWORD **pyx;

    Texture2D() { bmp=new Graphics::TBitmap; xs=0; ys=0; pyx=NULL; resize(1,1); }
    Texture2D(Texture2D& a) { *this=a; }
    ~Texture2D() { free(); if (bmp) delete bmp; bmp=NULL; }
    Texture2D* operator = (const Texture2D *a) { *this=*a; return this; }
    Texture2D* operator = (const Texture2D &a) { bmp->Assign(a.bmp); resize(); return this; }

    void free() { if (pyx) delete[] pyx; xs=0; ys=0; pyx=NULL; }
    void resize(int _xs=-1,int _ys=-1) { free(); if ((_xs<0)||(_ys<0)) { _xs=bmp->Width; _ys=bmp->Height; } else bmp->SetSize(_xs,_ys); bmp->HandleType=bmDIB; bmp->PixelFormat=pf32bit; pyx=new DWORD* [_ys]; for (int y=0;y<_ys;y++) pyx[y]=(DWORD*)bmp->ScanLine[y]; xs=bmp->Width; ys=bmp->Height; }
    void load(AnsiString name) { AnsiString ext=ExtractFileExt(name).LowerCase(); for(;;) { if (ext==".bmp") { bmp->LoadFromFile(name); break; } if (ext==".jpg") { TJPEGImage *jpg=new TJPEGImage; if (jpg==NULL) return; jpg->LoadFromFile(name); bmp->Assign(jpg); delete jpg; break; } return; } resize(); }
    };
class Doom3D
    {
public:
    Texture2D map,map2,scr,sky;         // map, map preview, screen, sky texture
    Texture2D txr[_Doom3D_mipmaps],*ptxr;// texture atlas with mipmas, actual mipmap for rendering scan lines
    DWORD sxs2,sys2;                    // screen half resolution
    DWORD tn,tm;                        // number of textures in texture atlas, number of mipmaps used
    BYTE liH[256],liV[256],liF[256];    // shading LUT for H,V,floor/ceiling (light scaled shades of gray)

    int scale_x;
    bool _no_mipmap;

    struct _player
        {
        double  a;                      // player view direction [rad]
        double x0,y0,z0;                // old player position [cell]
        double  x, y, z;                // player position [cell]
        double vx,vy,vz;                // player speed [cell/s]
        double ax,ay,az;                // player acceleration [cell/s]
        void update(double dt)
            {
            x0=x; vx+=ax*dt; x+=vx*dt;
            y0=y; vy+=ay*dt; y+=vy*dt;
            z0=z; vz+=az*dt; z+=vz*dt;
            }
        _player() { a=x=y=z=vx=vy=vz=ax=ay=az=0.0; }
        _player(_player& a) { *this=a; }
        ~_player() {};
        _player* operator = (const _player *a) { *this=*a; return this; }
        //_player* operator = (const _player &a) { ..copy... return this; }
        } plr;
    double view_ang;                    // [rad] view angle
    double focus;                       // [cells] view focal length
    double wall;                        // [px] projected wall size ratio size = height*wall/distance
    struct _ray
        {
        int    x,y;                     // cell map position
        double ang;                     // ray angle
        double x0,y0,l0;                // cell first hit
        double x1,y1,l1;                // cell second hit
        int    sx;                      // screen x coordinate
        int    sy0,sy1;                 // screen y coordinates of V scanline to render
        char tp0,tp1;                   // H/V hit type
        DWORD map;                      // map cell of hit or 0xFFFFFFFF
        _ray() {};
        _ray(_ray& a)   { *this=a; }
        ~_ray() {};
        _ray* operator = (const _ray *a) { *this=*a; return this; }
        //_ray* operator = (const _ray &a) { ..copy... return this; }
        };

    keytab keys;                        // keyboard handler
    DWORD txr_wall;                     // map editor
    DWORD txr_floor;
    DWORD txr_ceil;
    DWORD cell_h;
    DWORD *txr_mode; AnsiString txr_mode_txt;

    Doom3D();
    Doom3D(Doom3D& a)   { *this=a; }
    ~Doom3D();
    Doom3D* operator = (const Doom3D *a) { *this=*a; return this; }
    //Doom3D* operator = (const Doom3D &a) { ..copy... return this; }

    void map_resize(DWORD xs,DWORD ys); // change map resolution
    void map_clear();                   // clear whole map
    void map_save(AnsiString name);     // save map to file
    void map_load(AnsiString name);     // load map from file
    void scr_resize(DWORD xs,DWORD ys); // resize view
    void txr_mipmap();                  // generate mipmaps for txr

    bool cell2screen(int &sx,int &sy,double x,double y,double z);   // [pixel] <- [cell] return tru if in front of player
    void draw_scanline(int sx,int sy0,int sy1,                int symin,int tx0,int ty0,int tx1,int ty1,BYTE *li); // render screen y-scan line from texture (sub routine)
    void draw_scanline(int sx,int sy0,int sy1,int sz0,int sz1,int symin,int tx0,int ty0,int tx1,int ty1,BYTE *li); // render screen y-scan line from texture (sub routine)
    void draw_cell(_ray &p);            // render actual cell hit by ray p (sub routine)
    void draw();                        // render view
    void update(double dt);             // update game logic (call in timer with interval dt [s])
    void mouse(double x,double y,TShiftState sh)    // editor mouse handler
        {
        keys.setm(x,y,sh);  // mx,my mouse screen pos [pixels]
        x=floor(x/_Doom3D_edit_cell_size); //if (x>=map.xs) x=map.xs-1; if (x<0) x=0;
        y=floor(y/_Doom3D_edit_cell_size); //if (y>=map.ys) y=map.ys-1; if (y<0) y=0;
        DWORD xx=x,yy=y;
        if ((xx>=0)&&(xx<map.xs)&&(yy>=0)&&(yy<map.ys)) keys.setk(xx,yy,sh); // kx,ky mouse map pos [cells]
        xx=keys.kx; yy=keys.ky;
        if (keys.Shift.Contains(ssLeft  )) map.pyx[yy][xx]=(txr_wall)|(txr_floor<<8)|(txr_ceil<<16)|(cell_h<<24);
        if (keys.Shift.Contains(ssRight )) map.pyx[yy][xx]=0xFFFFFFFF;
        if (keys.Shift.Contains(ssMiddle))
            {
            DWORD c=map.pyx[yy][xx];
            txr_wall  =c     &0xFF;
            txr_floor=(c>> 8)&0xFF;
            txr_ceil =(c>>16)&0xFF;
            cell_h   =(c>>24)&0xFF;
            }
        keys.rfsmouse();
        }
    void wheel(int delta,TShiftState sh)    // editor mouse wheel handler
        {
        if (sh.Contains(ssShift))
            {
            if (delta<0) { cell_h-=10; if (cell_h>_Doom3D_cell_size) cell_h=0; }
            if (delta>0) { cell_h+=10; if (cell_h>_Doom3D_cell_size) cell_h=_Doom3D_cell_size; }
            }
        else{
            if (delta<0) { (*txr_mode)--; if (*txr_mode>=tn) *txr_mode=tn-1; }
            if (delta>0) { (*txr_mode)++; if (*txr_mode==tn) *txr_mode=   0; }
            }
        }
    };

The full code (without any helper files) is ~27.7 KByte so it would not fit with answer. Its old version (without floor/ceiling/top of walls) is here:

Its more complicated than yours as it allows walls with different heights and separate texture for wall, floor and ceiling (however ceiling is not implemented yet I used cloud texture instead) and also jumps (z position of player).

The geometry of map and projections are the same as in the link above. Here a helper function that convert between 2.5D map position and screen (so you know what math is used):

bool Doom3D::cell2screen(int &sx,int &sy,double x,double y,double z)
    {
    double a,l;
    // x,y relative to player
    x-=plr.x;
    y-=plr.y;
    // convert z from [cell] to units
    z*=_Doom3D_cell_size;
    // angle -> sx
    a=atanxy(x,y)-plr.a;
    if (a<-pi) a+=pi2;
    if (a>+pi) a-=pi2;
    sx=double(sxs2)*(1.0+(2.0*a/view_ang));
    // perpendicular distance -> sy
    l=sqrt((x*x)+(y*y))*cos(a);
    sy=sys2+divide((double((2.0*plr.z+1.0)*_Doom3D_cell_size)-z-z)*wall,l);
    // in front of player?
    return (fabs(a)<=0.5*pi);
    }

The function itself is used only for HUD of editor (highlight selected map cell in 2.5D view) the sxs2,sys2 is the center of screen (half of resolution sxs,sys) and wall is used to manage the perspective projection computed like this:

void Doom3D::scr_resize(DWORD xs,DWORD ys)
    {
    scr.resize(xs,ys);
    sxs2=scr.xs>>1;
    sys2=scr.ys>>1;
    // aspect ratio,view angle corrections
    double a=90.0*deg-view_ang;
    wall=double(scr.xs)*(1.25+(0.288*a)+(2.04*a*a))*focus/double(_Doom3D_cell_size);
    }

In my engine the ray is tested in more like ray march manner as it does not stop on first hit, but on first hit that covers whole wall size (so rays can pass through smaller walls that do not block whole view which also render the ground tiles along the way).

As your engine does not do this and have full size walls only you will have just single hit instead.

Now finally get back to your question. I see 2 ways of improvement:

  1. use result of the ray from wall test instead of casting floor/ceiling ray

    As you want to have separate tiles on ceiling and floor then you should use ray marching like ray cast. Meaning cast one ray per each column of screen and iterate it by all map cell crossings until a wall is hit. However instead of rendering only on wall hit you have to render on each cell hit. Something like on this image:

    So red line is cast ray (orange is just a mirror). Each rendered cell of map is hit with the ray in 2 points. You should know the map cell position of each hit and also its screen coordinate from the ray casting. So you just need to add the perpendicular distance to camera and render the line segment as a perspective correct interpolated textured line for floor and ceiling. The wall is always just vertical non perspective textured line. The texture coordinates are taken from the map position of hit (fractional part of coordinates). In code its a bit messy but here it is:

     void Doom3D::draw_scanline(int sx,int sy0,int sy1,int symin,int tx0,int ty0,int tx1,int ty1,BYTE *li)
         {
         // affine texture mapping (front side of walls) sy0>sy1
         union { DWORD dd; BYTE db[4]; } cc;
         int sy,tx,ty,ktx,kty,dtx,dty,ctx,cty,dsy;
                dsy=sy1-sy0; if (dsy<0)                        dsy=-dsy;
         ktx=0; dtx=tx1-tx0; if (dtx>0) ktx=+1; else { ktx=-1; dtx=-dtx; } tx=tx0; ctx=0;
         kty=0; dty=ty1-ty0; if (dty>0) kty=+1; else { kty=-1; dty=-dty; } ty=ty0; cty=0;
         if (dsy) for (sy=sy0;sy>=sy1;sy--)
             {
             if ((sy>=0)&&(sy<scr.ys)&&(sy<=symin))
              if ((tx>=0)&&(tx<ptxr->xs)&&(ty>=0)&&(ty<ptxr->ys))
                 {
                 cc.dd=ptxr->pyx[ty][tx];
                 cc.db[0]=li[cc.db[0]];
                 cc.db[1]=li[cc.db[1]];
                 cc.db[2]=li[cc.db[2]];
                 scr.pyx[sy][sx]=cc.dd;
                 }
             for (ctx+=dtx;ctx>=dsy;) { ctx-=dsy; tx+=ktx; }
             for (cty+=dty;cty>=dsy;) { cty-=dsy; ty+=kty; }
             }
         }
     void Doom3D::draw_scanline(int sx,int sy0,int sy1,int sz0,int sz1,int symin,int tx0,int ty0,int tx1,int ty1,BYTE *li)
         {
         // perspective correct mapping (floor, top side of walls, ceiling) sy0>sy1
         union { DWORD dd; BYTE db[4]; } cc;
         int sy,tx,ty,dsy,dtx,dty,n,dn;
         int a,_z0,_z1,_tx;
         const int acc0=16;
         const int acc1=8;
         _tx=tx0-(tx0%ptxr->ys);
         tx0-=_tx;
         tx1-=_tx;
         dsy=sy1-sy0; dn=abs(dsy);
         dtx=tx1-tx0;
         dty=ty1-ty0;
         if (sz0==0) return; _z0=(1<<acc0)/sz0;
         if (sz1==0) return; _z1=(1<<acc0)/sz1;
         if (dn) for (n=0;n<=dn;n++)
             {
             sy=sy0+((n*dsy)/dn);
             a=((n<<acc1)*_z1)/(((dn-n)*_z0)+(n*_z1)); // perspective correction a=<0,1<<acc1> (https://en.wikipedia.org/wiki/Texture_mapping)
             tx=tx0+((a*dtx)>>acc1)+_tx;
             ty=ty0+((a*dty)>>acc1);
             if ((sy>=0)&&(sy<scr.ys)&&(sy<=symin))
              if ((tx>=0)&&(tx<ptxr->xs)&&(ty>=0)&&(ty<ptxr->ys))
                 {
                 cc.dd=ptxr->pyx[ty][tx];
                 cc.db[0]=li[cc.db[0]];
                 cc.db[1]=li[cc.db[1]];
                 cc.db[2]=li[cc.db[2]];
                 scr.pyx[sy][sx]=cc.dd;
                 }
             }
         }
     void Doom3D::draw_cell(_ray &p)
         {
         BYTE *li;
         DWORD m;
         int tx0,tx1,ty0,ty1,sy,sy0,sy1,sy2,sy3,sz0,sz1,q;
         int sy4,sy5;
         //sy0>=sy1
         sy0=sys2+divide(double((1.0+2.0*plr.z)*_Doom3D_cell_size)*wall,p.l0);
         sy1=sy0 -divide(double((p.map>>24)<<1                   )*wall,p.l0);
         sy2=sys2+divide(double((1.0+2.0*plr.z)*_Doom3D_cell_size)*wall,p.l1);
         sy3=sy2 -divide(double((p.map>>24)<<1                   )*wall,p.l1);
         sy4=sys2-divide(double((1.0-2.0*plr.z)*_Doom3D_cell_size)*wall,p.l1);
         sy5=sys2-divide(double((1.0-2.0*plr.z)*_Doom3D_cell_size)*wall,p.l0);
         sz0=double(p.l0*_Doom3D_cell_size);
         sz1=double(p.l1*_Doom3D_cell_size);
         // select mipmap resolution
         ty0=divide(double(_Doom3D_cell_size<<1)*wall,p.l0);
         for (q=tm-1;q>=0;q--)
             {
             ptxr=txr+q;
             if (ty0<=ptxr->ys) break;
             }
         if (_no_mipmap) ptxr=txr;
         // mouse select
         if (p.sx==round(keys.mx))
          if (keys.my>=sy3)
           if (keys.my<=sy0)
            if ((keys.my>=map2.ys)||(keys.mx>=map2.xs))
             {
             keys.kx=p.x;
             keys.ky=p.y;
             }
         if ((p.map&0xFF)==0xFF) { sy1=sy0; sy3=sy2; }
         // wall
         if ((sy1<p.sy1)&&((p.map&0xFF)!=0xFF))
             {
             tx0=ptxr->ys*(p.map&0xFF);
             if (p.tp0=='H') { li=liH; tx0+=double(double(ptxr->ys-1)*(p.x0-floor(p.x0))); }
             if (p.tp0=='V') { li=liV; tx0+=double(double(ptxr->ys-1)*(p.y0-floor(p.y0))); }
             draw_scanline(p.sx,sy0,sy1,p.sy1,tx0,0,tx0,((p.map>>24)*(ptxr->ys-1))/_Doom3D_cell_size,li);
             p.sy1=sy1;
             }
         // ceiling
         if ((p.map&0xFF0000)!=0xFF0000)
             {
             q=ptxr->ys*((p.map>>16)&0xFF);
             tx0=double(double(ptxr->ys-1)*(p.x0-double(p.x)))+q;
             ty0=double(double(ptxr->ys-1)*(p.y0-double(p.y)));
             tx1=double(double(ptxr->ys-1)*(p.x1-double(p.x)))+q;
             ty1=double(double(ptxr->ys-1)*(p.y1-double(p.y)));
             draw_scanline(p.sx,sy5,sy4,sz0,sz1,p.sy1,tx0,ty0,tx1,ty1,liF);
             }
         // floor/top side
         if ((sy3<p.sy1)&&((p.map&0xFF00)!=0xFF00))
             {
             q=ptxr->ys*((p.map>>8)&0xFF);
             tx0=double(double(ptxr->ys-1)*(p.x0-double(p.x)))+q;
             ty0=double(double(ptxr->ys-1)*(p.y0-double(p.y)));
             tx1=double(double(ptxr->ys-1)*(p.x1-double(p.x)))+q;
             ty1=double(double(ptxr->ys-1)*(p.y1-double(p.y)));
             draw_scanline(p.sx,sy1,sy3,sz0,sz1,p.sy1,tx0,ty0,tx1,ty1,liF);
             p.sy1=sy3;
             }
         if (sy3<p.sy1) p.sy1=sy3;
         }
     void Doom3D::draw()
         {
         tbeg();
         _ray p;
         DWORD x,y,c,m;
         DWORD mx,mx0,mx1;
         DWORD my,my0,my1;
         double a,a0,da,dx,dy,l;
         double xx0,yy0,dx0,dy0,ll0,dl0;
         double xx1,yy1,dx1,dy1,ll1,dl1;
    
         // compute diffuse + ambient lighting LUT (light scaled shades of gray)
         c=155.0+fabs(100.0*sin(   plr.a)); for (x=0;x<256;x++) liH[x]=(x*c)>>8; // H wall
         c=155.0+fabs(100.0*cos(   plr.a)); for (x=0;x<256;x++) liV[x]=(x*c)>>8; // V wall
         c=155.0+fabs(100.0*cos(30.0*deg)); for (x=0;x<256;x++) liF[x]=(x*c)>>8; // floor, wall top side
    
         // [2D map]
         m=_Doom3D_edit_cell_size;
         for (my0=0,my1=m,y=0;y<map.ys;y++,my0=my1,my1+=m)   // map.pyx[][]
          for (mx0=0,mx1=m,x=0;x<map.xs;x++,mx0=mx1,mx1+=m)
             {
             c=0x00010101*((0x40+(0x40*(map.pyx[y][x]>>24)))/_Doom3D_cell_size);
             for (my=my0;my<my1;my++)
              for (mx=mx0;mx<mx1;mx++)
               map2.pyx[my][mx]=c;
             }
         c=0x00202020;                                       // map grid
         for (y=0;y<map2.ys;y+=m) for (x=0;x<map2.xs;x++) map2.pyx[y][x]=c;
         for (x=0;x<map2.xs;x+=m) for (y=0;y<map2.ys;y++) map2.pyx[y][x]=c;
         x=keys.kx*m;                                        // selected cell
         y=keys.ky*m;
         map2.bmp->Canvas->Pen->Color=0x0020FFFF;
         map2.bmp->Canvas->MoveTo(x  ,y  );
         map2.bmp->Canvas->LineTo(x+m,y  );
         map2.bmp->Canvas->LineTo(x+m,y+m);
         map2.bmp->Canvas->LineTo(x  ,y+m);
         map2.bmp->Canvas->LineTo(x  ,y  );
         map2.bmp->Canvas->Pen->Mode=pmMerge;
    
         // [cast rays]
         a0=plr.a-(0.5*view_ang);
         da=divide(view_ang,scr.xs-1);
         da*=scale_x;
         for (a=a0,x=0;x<scr.xs;x+=scale_x,a+=da)
             {
             // grid V-line hits
             ll0=1.0e20; dl0=0.0; dx0=cos(a); const char tp0='V';
             if (dx0<0.0) { xx0=floor(plr.x); dx0=-1.0; }
             if (dx0>0.0) { xx0=ceil (plr.x); dx0=+1.0; }
             if (fabs(dx0)>1e-6) { dy0=tan(a); yy0=plr.y+((xx0-plr.x)*dy0);             dy0*=dx0; dx=xx0-plr.x; dy=yy0-plr.y; ll0=sqrt((dx*dx)+(dy*dy)); dl0=sqrt((dx0*dx0)+(dy0*dy0)); }
             // grid H-line hits
             ll1=1.0e20; dl1=0.0; dy1=sin(a); const char tp1='H';
             if (dy1<0.0) { yy1=floor(plr.y); dy1=-1.0; }
             if (dy1>0.0) { yy1=ceil (plr.y); dy1=+1.0; }
             if (fabs(dy1)>1e-6) { dx1=divide(1.0,tan(a)); xx1=plr.x+((yy1-plr.y)*dx1); dx1*=dy1; dx=xx1-plr.x; dy=yy1-plr.y; ll1=sqrt((dx*dx)+(dy*dy)); dl1=sqrt((dx1*dx1)+(dy1*dy1)); }
             p.ang=a;
             p.sx =x;
             p.sy0=scr.ys;
             p.sy1=scr.ys;
             // first hit
             if (ll0<ll1){ p.tp0=tp0; p.x0=xx0; p.y0=yy0; p.l0=ll0; xx0+=dx0; yy0+=dy0; ll0+=dl0; }
              else       { p.tp0=tp1; p.x0=xx1; p.y0=yy1; p.l0=ll1; xx1+=dx1; yy1+=dy1; ll1+=dl1; }
             p.l0*=cos(p.ang-plr.a); // anti fish eye
             p.map=0xFFFFFFFF; p.x=p.x0; p.y=p.y0;
             for (;;)
                 {
                 // closest hit
                 if (ll0<ll1) { p.tp1=tp0; p.x1=xx0; p.y1=yy0; p.l1=ll0; xx0+=dx0; yy0+=dy0; ll0+=dl0; }
                  else        { p.tp1=tp1; p.x1=xx1; p.y1=yy1; p.l1=ll1; xx1+=dx1; yy1+=dy1; ll1+=dl1; }
                 p.x=floor(0.5*(p.x0+p.x1)); // actaul cell position
                 p.y=floor(0.5*(p.y0+p.y1));
                 p.l1*=cos(p.ang-plr.a);     // anti fish eye
                 // edge of map crossed?
                 if ((p.x>=0)&&(p.x<map.xs)&&(p.y>=0)&&(p.y<map.ys)) p.map=map.pyx[p.y][p.x]; else break;
                 // render
                 draw_cell(p);               // scan line
                 if (p.sy1<=0) break;        // scan line reached top of screen
                 // prepare next cell position
                 p.tp0=p.tp1; p.x0=p.x1; p.y0=p.y1; p.l0=p.l1;
                 }
             // copy skiped scan lines
             for (mx=1;mx<scale_x;mx++)
              if (x+mx<scr.xs)
               for (y=0;y<scr.ys;y++)
                scr.pyx[y][x+mx]=scr.pyx[y][x];
             // render map ray
             if (x==sxs2)                   map2.bmp->Canvas->Pen->Color=0x000000FF;
             if ((x==0)||(x==sxs2+scale_x)) map2.bmp->Canvas->Pen->Color=0x00002020;
             map2.bmp->Canvas->MoveTo(plr.x*m,plr.y*m);
             map2.bmp->Canvas->LineTo(p.x1*m,p.y1*m);
             }
         map2.bmp->Canvas->Pen->Mode=pmCopy;
         map2.bmp->Canvas->Pen->Color=0x000000FF;
         map2.bmp->Canvas->Brush->Color=0x000000FF;
         c=focus*m;
         map2.bmp->Canvas->Ellipse(plr.x*m-c,plr.y*m-c,plr.x*m+c,plr.y*m+c);
         scr.bmp->Canvas->Draw(0,0,map2.bmp);
         // ... here HUD and info texts continues I skipped it to keep this simple
         }
    
  2. render floor and ceiling without per pixel of row/col raycast

    Simple ray casters do use non textured floor/ceiling which makes this simple just render half of screen with sky and the other with ground color before rendering the walls (or after it if rendered walls start and end is remembered):

    something like this in code:

     int x,y,sxs=sxs2<<1,sys=sys2<<1;
     // simple color sky/ceiling
     for (y=0;y<sys2;y++)
      for (x=0;x<sxs;x++)
       scr.pyx[y][x]=0x000080FF;
      for (y=sys2;y<sys;y++)
       for (x=0;x<sxs;x++)
        scr.pyx[y][x]=0x00404040;
    

    To make this more nice usually for outdoor parts of map sky texture is added that covers the ceiling. It does not move with player just rotates. So you can map a sky textured quad to upper half of view that just rotates with -plr.a Here overview of the geometry:

    The bigger radius R is half of texture resolution and the smaller I empirically compute by r=R*sin(0.5*view_ang) as its looking best to me (however the true value should be computed from screen aspect ratio and perspective focal length and view_ang).

    Here some code for this:

         const int x0=0,x1=sxs2<<1,y0=0,y1=sys2,y2=sys2<<1;
         int sx[4]={x0,x0,x1,x1},
             sy[4]={y0,y1,y1,y0},
             tx[4],ty[4],dx,dy;
         float a,r,R;
         R=sky.xs>>1;            // sky texture inscribed circle radius
         r=R*sin(0.5*view_ang);  // smaller radius (visible portion of sky)
         dx=sky.xs>>1;           // mid of sky texture
         dy=sky.ys>>1;
         a=plr.a-(0.5*view_ang);
         tx[0]=float(R*cos(a))+dx;
         ty[0]=float(R*sin(a))+dy;
         a=plr.a-(0.5*view_ang);
         tx[1]=float(r*cos(a))+dx;
         ty[1]=float(r*sin(a))+dy;
         a=plr.a+(0.5*view_ang);
         tx[2]=float(r*cos(a))+dx;
         ty[2]=float(r*sin(a))+dy;
         a=plr.a+(0.5*view_ang);
         tx[3]=float(R*cos(a))+dx;
         ty[3]=float(R*sin(a))+dy;
         polygon2D(scr,sky,sx,sy,tx,ty,4);
    

    The ground (and indoor ceiling) can be done similarly but the radius R must be a fraction of whole texture. The player position in map must be scaled to texture half size - R and added to the texture coordinates. However the texture resolution must be big enough otherwise it will not look as good (Ideally the empty space should match the resolution of your map size * wall texture resolution... So if R is half the empty space will be also R then its done like this:

         const int x0=0,x1=sxs2<<1,y0=0,y1=sys2,y2=sys2<<1;
         int sx[4]={x0,x0,x1,x1},
             sy[4]={y1,y2,y2,y1},
             tx[4],ty[4],i,dx,dy,dr;
         float a,r,R;
         R=sky.xs>>2;            // sky texture inscribed circle radius /2 so empty space is also R
         r=R*sin(0.5*view_ang);  // smaller radius (visible portion of sky)
         dx=sky.xs>>1;           // mid of sky texture
         dy=sky.ys>>1;
         a=float(R)/float(map.xs);   // add player position skaled to empty space
         dx+=float(plr.x*a);
         dy+=float(plr.y*a);
         a=plr.a-(0.5*view_ang);
         tx[0]=float(R*cos(a))+dx;
         ty[0]=float(R*sin(a))+dy;
         a=plr.a-(0.5*view_ang);
         tx[1]=float(r*cos(a))+dx;
         ty[1]=float(r*sin(a))+dy;
         a=plr.a+(0.5*view_ang);
         tx[2]=float(r*cos(a))+dx;
         ty[2]=float(r*sin(a))+dy;
         a=plr.a+(0.5*view_ang);
         tx[3]=float(R*cos(a))+dx;
         ty[3]=float(R*sin(a))+dy;
         polygon2D(scr,sky,sx,sy,tx,ty,4);
    

    In case you want to use smaller textures then your polygon rendering must be capable of handling texture coordinates like GL_REPEAT in OpenGL. The function polygon2D(scr,sky,sx,sy,tx,ty,4) is just simple/ugly/slow/unoptimized 2D textured polygon render I bustled yesterday just to test this (as I did not want to mess my optimized rendering routines for #1 methods which support only scan lines instead of polygons anyway) where sx,sy are array of screen coordinates, tx,ty are arrays f texture coordinates, 4 is number of vertexes and scr,txr are target and source textures. The code is just a port of this without the shadings and SSD1306 related stuff. Here full code:

     const int ys_max=1024;
     int bufl_vx[ys_max],bufr_vx[ys_max];
     int bufl_tx[ys_max],bufr_tx[ys_max];
     int bufl_ty[ys_max],bufr_ty[ys_max];
     void _fill2D_line(Texture2D &scr,Texture2D &txr,int vx0,int vy0,int tx0,int ty0,int vx1,int vy1,int tx1,int ty1)
         {
         int *bvx,*btx,*bty;
         int i,n,cvx,cvy,ctx,cty,svx,svy,stx,sty;
         // target buffer depend on y direction (before point ordering)
         if (vy0<vy1){ bvx=bufl_vx; btx=bufl_tx; bty=bufl_ty; }
          else       { bvx=bufr_vx; btx=bufr_tx; bty=bufr_ty; }
         // order points so joined edges are interpolated the same way
         if (vx0>vx1)
             {
             i=vx0; vx0=vx1; vx1=i;
             i=vy0; vy0=vy1; vy1=i;
             i=tx0; tx0=tx1; tx1=i;
             i=ty0; ty0=ty1; ty1=i;
             }
         // line DDA parameters
         vx1-=vx0; svx=0; if (vx1>0) svx=+1; if (vx1<0) { svx=-1; vx1=-vx1; } if (vx1) vx1++;            n=vx1;
         vy1-=vy0; svy=0; if (vy1>0) svy=+1; if (vy1<0) { svy=-1; vy1=-vy1; } if (vy1) vy1++; if (n<vy1) n=vy1;
         tx1-=tx0; stx=0; if (tx1>0) stx=+1; if (tx1<0) { stx=-1; tx1=-tx1; } if (tx1) tx1++; if (n<tx1) n=tx1;
         ty1-=ty0; sty=0; if (ty1>0) sty=+1; if (ty1<0) { sty=-1; ty1=-ty1; } if (ty1) ty1++; if (n<ty1) n=ty1;
         // single pixel (not a line)
         if (!n)
             {
             if ((vy0>=0)&&(vy0<scr.ys))
                 {
                 bufl_vx[vy0]=vx0; bufl_tx[vy0]=tx0; bufl_ty[vy0]=ty0;
                 bufr_vx[vy0]=vx0; bufr_tx[vy0]=tx0; bufr_ty[vy0]=ty0;
                 }
             return;
             }
         // horizontal line
         if (svy==0) return;
         // ND DDA algo i is parameter
         for (cvx=cvy=ctx=cty=n,i=0;;)
             {
             if ((vy0>=0)&&(vy0<scr.ys)){ bvx[vy0]=vx0; btx[vy0]=tx0; bty[vy0]=ty0; }
             i++; if (i>=n) break;
             cvx-=vx1; if (cvx<=0){ cvx+=n; vx0+=svx; }
             cvy-=vy1; if (cvy<=0){ cvy+=n; vy0+=svy; }
             ctx-=tx1; if (ctx<=0){ ctx+=n; tx0+=stx; }
             cty-=ty1; if (cty<=0){ cty+=n; ty0+=sty; }
             }
         }
     void _fill2D(Texture2D &scr,Texture2D &txr,int Y0,int Y1)
         {
         int vx0,vx1,tx0,tx1,ty0,ty1;
         int vy,i,n,cvx,ctx,cty,svx,stx,sty;
         // fill horizontal lines
         for (vy=Y0;vy<=Y1;vy++)
             {
             // horizontal line to render
             vx0=bufl_vx[vy]; tx0=bufl_tx[vy]; ty0=bufl_ty[vy];
             vx1=bufr_vx[vy]; tx1=bufr_tx[vy]; ty1=bufr_ty[vy];
             if ((vx0<      0)||(vx1<      0)) continue;
             if ((vx0<      0)&&(vx1<      0)) continue;
             if ((vx0>=scr.xs)&&(vx1>=scr.xs)) continue;
             // line DDA parameters
             vx1-=vx0; svx=0; if (vx1>0) svx=+1; if (vx1<0) { svx=-1; vx1=-vx1; } if (vx1) vx1++;            n=vx1;
             tx1-=tx0; stx=0; if (tx1>0) stx=+1; if (tx1<0) { stx=-1; tx1=-tx1; } if (tx1) tx1++; if (n<tx1) n=tx1;
             ty1-=ty0; sty=0; if (ty1>0) sty=+1; if (ty1<0) { sty=-1; ty1=-ty1; } if (ty1) ty1++; if (n<ty1) n=ty1;
             // single pixel (not a line)
             if (!n)
                 {
                 if ((vx0>=0)&&(vx0<scr.xs)) scr.pyx[vy][vx0]=txr.pyx[ty0][tx0];
                 continue;
                 }
             // ND DDA algo i is parameter
             for (cvx=ctx=cty=n,i=0;;)
                 {
                 while (tx0<0) tx0+=txr.xs;
                 while (ty0<0) ty0+=txr.ys;
                 while (tx0>=txr.xs) tx0-=txr.xs;
                 while (ty0>=txr.ys) ty0-=txr.ys;
                 if ((vx0>=0)&&(vx0<scr.xs)) scr.pyx[vy][vx0]=txr.pyx[ty0][tx0];
                 i++; if (i>=n) break;
                 cvx-=vx1; if (cvx<=0){ cvx+=n; vx0+=svx; }
                 ctx-=tx1; if (ctx<=0){ ctx+=n; tx0+=stx; }
                 cty-=ty1; if (cty<=0){ cty+=n; ty0+=sty; }
                 }
             }
         }
     void polygon2D(Texture2D &scr,Texture2D &txr,int *vx,int *vy,int *tx,int *ty,int n)
         {
         int i,j,y,Y0,Y1;
         // y range to render
         Y0=Y1=vy[0];
         for (i=1;i<n;i++)
             {
             if (Y0>vy[i]) Y0=vy[i];
             if (Y1<vy[i]) Y1=vy[i];
             }
         // clip to screen in y axis
         if ((Y1<0)||(Y0>=scr.ys)) return;
         if (Y0<      0) Y0=       0;
         if (Y1>=scr.ys) Y1=scr.ys-1;
         // clear buffers
         for (y=Y0;y<=Y1;y++)
             {
             bufl_vx[y]=-1;
             bufr_vx[y]=-1;
             }
         // render circumference
         for (j=n-1,i=0;i<n;j=i,i++)
          _fill2D_line(scr,txr,vx[i],vy[i],tx[i],ty[i],vx[j],vy[j],tx[j],ty[j]);
         // fill horizontal lines
         _fill2D(scr,txr,Y0,Y1);
         }
    

    And finally preview (using sky texture for both sky and ground):

    The render does not have perspective correct interpolation but for single big texture its not a big problem. In case you want also jumps then you need recompute R,r with the use of z position of player or use Another option to compute the texture coordinates by simply casting 4 rays (one for each corner of half screen rectangle) and check where it hit map edges.

    The math behind can be found in here:

    However beware your perspective must match the ray cast otherwise alignment artifacts may occur (or ground move with slightly different speed than walls).