# Update redirected links in your website
# (c) 2017-2025 Scriptol.com
# Free under the MIT License.
# Requires the PHP 7 interpreter.

# Works on a local copy of your site.
# Searches for all 301 redirected links
# and replaces them by the target URL.
# Check broken links too.
#
# You can update the online content of the site
# with PHP FTP Synchronizer.
#

# Scriptol-PHP Implementation of DOM official
# Fully compatible with PHP, requires Scriptol PHP
# The DOM interface is (c) 2007 Denis Sureau - Mozilla 1.1 Licence.
#
extern

class DOMComment
/class

class DOMText
    text wholeText
	void DOMText(cstring = "")
	DOMText splitText()
/class

class DOMNode
	text nodeValue         // the content
	text nodeName
	text textContent
	DOMNode firstChild
	DOMNode lastChild
	DOMNode previousSibling
	DOMNode nextSibling
	
	boolean hasChildNodes()
	boolean hasAttributes()
	DOMNode appendChild(DOMNode)
	DOMNode removeChild(DOMNode)
	DOMNode replaceChild(DOMNode)
	DOMNode insertBefore(DOMNode, DOMNode)
/class

class DOMNodeList
	DOMNode item(integer)
	int length             // the number of elements
/class

class DOMElement is DOMNode
	cstring textContent
	cstring tagName
    
	void DOMElement(cstring)
	boolean hasAttribute(cstring)
	cstring getAttribute(cstring)
	boolean setAttribute(cstring, cstring)
	void setIdAttribute(cstring, boolean)
	boolean removeAttribute(cstring)
	DOMNodeList getElementsByTagName(cstring)
/class


class DOMDocument

  cstring encoding

  void DOMDocument(cstring = "", cstring="")
	boolean loadHTMLFile(cstring)       // Load a HTML file
	boolean loadHTML(cstring)		    // Create a HTML document from a string
	boolean load(cstring)		        // Load an XML file
	boolean loadXML(cstring)		    // Create a XML document from a string
	int saveHTMLFile(cstring = null)	// Save to a file the HTML document
	cstring saveHTML(cstring = null)	// Return a HTML document (save to string)
	int save(cstring = null)	        // Save to a file the XML document
	cstring saveXML(cstring = null)	    // Return an XML document (save to string)
	
	DOMElement getElementById(cstring)
	DOMNodeList getElementsByTagName(cstring)
	
	boolean validate()		            // Check using the DTD of the document
	
	DOMText createTextNode (cstring)
	DOMNode appendChild(DOMNode)
	DOMElement createElement(cstring)	// Create an orphan DOMElement, use appendChild
	DOMComment createComment(cstring)	// Create an orphan DOMComment

/class

/extern


#  Path Class
#  Scriptol - (c) 2001-2025  D.G Sureau
#  www.scriptol.com
#  Licence: OSS

#  This is a set of static functions related to files in directory
#  The path separator is "/" under Unix and Windows


class Path

    # EXISTS - Test if a file exists
    static boolean exists(text dname): return file_exists(dname)

    # SIZE - Return the size of a file
    static number size(text fname):   return filesize(fname)

    # TYPE - Return the type of an entry: file, dir, etc...
    static text type(text fname):   return filetype(fname)

    # DATE - Returns the date of a file

    static text created(text fname):
        int t = filemtime(fname)
    return date("",t)

    # ISFILE
    static boolean isFile(text fname) return filetype(fname) = "file"

    # ISDIR
    static boolean isDir(text fname)
	    text t = filetype(fname)
	    if t = "link"  return false
	    if t != "dir"  return false
    return true

    # REN
 
    static boolean ren(text oldname, text newname)
	    boolean b = true
	    rename(oldname, newname)
    return b

    # DELETE
    static boolean erase(text fname) return unlink(fname)

    # MERGE - Merge elements of path
    static text merge(text path, text filename)
	    if path="" return filename
	    if filename = ""  return path
        text plc = path[path.length()-1]
        text ffc = filename[0] 
        if (plc = "/") and (ffc = "/") return path + filename[1..] 
	    if (plc <> "/") and (ffc <> "/") let path + "/"
    return path + filename


    # MAKE DIR - Create a sub-directory

    static boolean make(text name) return mkdir(name)

    # SPLIT EXT - Split the node and the extension of a filename or path
    static text, text splitExt(text path)
	    int l = path.length()
	    if l = 0 ? return "", ""
	    for int x in l - 1 .. 0 step -1
		    if path[x] = "." ? return path[--x], path[x + 1..]
	    /for
    return path, ""

    # HAS EXTENSION - Test if the file has an extension or it is inside a list
    # the list is an array of extensions separated by a space (with or without dot)

    array nullarr = array()

    static boolean hasExtension(text path, array extlist = [])
	    int pos = path.findLast(".")
	    if pos = nil return false
   
      	text longext = path[pos ..]
	    text shortext = longext[1 ..]   // extension without dot
  
	    if shortext = "" return false
	    if extlist = nil return true    // no list provided, return true
	
    	if shortext in extlist return true
	    if longext in extlist return true
    return false		


    # GET EXTENSION - Get extension of a filename or path

    static text getExtension(text path)
	    int pos = path.findLast(".")
	    if pos <> nil return path[pos ..]
    return ""


    # CHANGE EXTENSION - Replace current extension by given on
    # on filename or full path

    static text changeExt(text path, text newext = "")
	    int l = path.length()
	    if l = 0 return newext
	    int pos = path.findLast(".")
	    if pos <> nil 
    	    if newext[0] ="." 
      		    path = path[ -- pos]
    	    else
    	  	    path = path[ ..pos]
    	    /if  
	    /if      
    return path + newext

    # HAS DIR  - Return true if the path has a directory or dir

    static boolean hasDir(text path)
	    int l = path.length()
	    if l = 0 ? return false
	    if l > 1
    		if path[1] = ":" return true
	    /if	

	    // Check if slash or anti-slash in string but leading or trailing ones
	    if path.find("/") <> nil return true
	    if path.find("\\") <> nil return true
    return false


# SPLIT  - Split path to directory and file

static text, text splitFile(text path)
	int l = path.length()
	if l = 0  return "",""
	for int x in l - 1 .. 0 step - 1
		if (path[x] = "/") or (path[x] = "\\")  return path[..x], path[x + 1..]
	/for
return "", path


# GET DIR Get current directory
 
text getDir()  return getcwd()

# COMPARE PATHS

static boolean compare(text a, text b)
	int l = a.length()
	if l <> b.length() return false
	for int i in 0 -- l
		if (a[i] = "\\") or (a[i] = "/")
			if b[i] = "/"   continue
			if b[i] = "\\" continue
			return false
		/if
		if a[i] <> b[i] return false
	/for
return true
/class




int pcounter = 0     // Number of pages scanned
int dcounter = 0     // Number of directories parsed
int lcounter = 0     // Number of links redirected 
int mcounter = 0     // Number of pages modified
int bcounter = 0     // Number of broken links 

text content         // Content of a page
text website = ""    // Domain name for relatives paths
text root = ""       // Root of the local repository
int rootlen 

bool DISPONLY = false
bool VERBOSE = false
bool RECONSTRUCT = false

# Add your extension if not in list

array extensions = [".html", ".htm", ".asp", ".php", ".https"]   

void usage()
	print
	print "UnRedir - (c) 2017-2025 Scriptol.com"
	print "Usage:" 
	print "Go to the root of the local copy of the website and type:"
	print "  php unredir.php [option][domain-name]"
	print "Options:"
	print "  domain  To build a full URL from a relative path. Replaced only if redirected."
	print "  -t      Display changes to be done, does not modify the files."
	print "  -v      Verbose, display each link checked."
	exit(0)
return


# Utilities

bool hasProtocol(text url)
    if url.find("://") = nil return false
return true

text rebuildURL(text url, text locdir)
    if url = "" return ""
    if url[..2] = "../" return url                  // not supported
    if url[0] = "/" return "//" + website + url[1 ..]
    if locdir.length() >= rootlen ? locdir = locdir[rootlen..]
    locdir = "http://" + website + locdir
return Path.merge(locdir, url)

text dispBroken(text url)
    print " *** Broken:", url
    bcounter + 1
return ""

# Check for redirect

text redirected(text url)
    if VERBOSE ? print "Check", url 
    if url.length() < 8 return ""  
    
    int code        
    text newurl 
    var hcurl = curl_init()
~~
    curl_setopt($hcurl, CURLOPT_CONNECTTIMEOUT, 300);
    curl_setopt($hcurl, CURLOPT_RETURNTRANSFER, true); 
    curl_setopt($hcurl, CURLOPT_VERBOSE, false);
    curl_setopt($hcurl, CURLOPT_URL, $url);        
    curl_setopt($hcurl, CURLOPT_HEADER, true);
    curl_setopt($hcurl, CURLOPT_NOBODY, true);
    curl_setopt($hcurl, CURLOPT_FOLLOWLOCATION, false);
    curl_setopt($hcurl, CURLOPT_SSL_VERIFYPEER, false);
    $headers = curl_exec($hcurl);
    $code = curl_getinfo($hcurl, CURLINFO_HTTP_CODE);
~~    
    if code = 404 
        curl_close(hcurl)
        return "404"
    /if    
    if code <> 301      // no redirected
        curl_close(hcurl)
        return ""      
    /if    
~~    
    curl_setopt($hcurl, CURLOPT_FOLLOWLOCATION, true);    
    $headers = curl_exec($hcurl);
    $newurl = curl_getinfo($hcurl, CURLINFO_EFFECTIVE_URL);    
    $code = curl_getinfo($hcurl, CURLINFO_HTTP_CODE);
~~    
    curl_close(hcurl)
    if code = 404 return "404"
    if code <> 200 return ""
return newurl

# Display page path

bool dflag = true
void dispPage(text fpath)
    if dflag = false return
    if not VERBOSE print "."
    print fpath
    print "-".dup(fpath.length())
    dflag = false
return 


# Search and replace a relative path (reg exp to be avoided)

int relativeReplace(text link, text nlink, text attribute)
    int ctr = 0
    int ll = link.length()
    int ptr = strpos(content, link)
    if ptr = nil return 0
    if ptr = 0 return 0
    if (ptr + ll) >= content.length() return 0
    
    text c = content[ptr-1]
    if (c <> '"') and (c <> "'") return 0 
    text c2 = content[ptr + ll]
    if c2 <> c return 0 
    text oldstr = c + link + c
    text newstr = c + nlink + c
    content = str_replace(oldstr, newstr, content, ctr)
return ctr

# Scan page for URLs

int linkProcess(text link, text locdir, text fpath, text attribute)
    int rcount
    text nlink
    
    if link = "" return 0
    if link[0] = "#" return 0
    if link[0] = "." return 0
    
    text chklink = link

    if not hasProtocol(link) 
        if not RECONSTRUCT return 0
        chklink = rebuildURL(link, locdir)
    /if    
    nlink = redirected(chklink)
    if nlink = nil return 0
    dispPage(fpath)        
    if nlink = "404"
        dispBroken(link)
        return 0
    /if
    if link = chklink
        content = str_replace(link, nlink, content, rcount)         // absolute url
    else
        rcount = relativeReplace(link, nlink, attribute)
    /if

    if rcount = 0 return 0
    if not VERBOSE print link
    print " Redirected to:", nlink
return rcount

void scanPage(text fpath, text locdir)
    DOMDocument dom =  DOMDocument("UTF-8")
    DOMElement node = null
    int count = 0
    text link
    dflag = true
    
    if VERBOSE print "Page :", fpath
    
    content = file_get_contents(fpath)
    ~~@~~
    dom.loadHTML(content)
    pcounter + 1
    for node in dom.getElementsByTagName('a')
        link = node.getAttribute("href")
        count + linkProcess(link, locdir, fpath, "href")
    /for
    for node in dom.getElementsByTagName('img')
        link = node.getAttribute("src")
        count + linkProcess(link, locdir, fpath, "src")    
    /for

    if count > 0
        var putctr = false
        if not DISPONLY ? putctr = file_put_contents(fpath, content)
        if putctr <> false
            echo "Saved with ", count, " change", plural(count) 
            print
        else
            print "Not saved"
        /if    
        lcounter + count
        mcounter + 1
    else
        if not VERBOSE echo "."
    /if    
    
return    

# Scan directory and subdirectories

void scanDirectory(text locdir)

	print
	print locdir
	print "-".dup(locdir.length())

	array dirlist = scandir(locdir)
	if dirlist.empty() return
	
	# Processing files
	
	for text name in dirlist
	    if name[0] = "." continue
	    text e = Path.getExtension(name)
	    if not (e in extensions) continue
        name = Path.merge(locdir, name) 
		if filetype(name) = "file"
            scanPage(name, locdir)
            pcounter + 1
		/if
	/for

	# Processing subdirs
	
	for text name in dirlist
	    if name[0] = '.' continue
		name = Path.merge(locdir, name)	
		if filetype(name) = "dir"
			scanDirectory(name)
			dcounter + 1
		/if
	/for	

return

# Main program

int main(int argc, array argv)
	array x = argv[1 .. ]
	for text param in x
	    if param = "-v"
	        VERBOSE = true
	        continue
	    /if     
	    if param = "-t"
	        DISPONLY = true
	        continue
	    /if
	    if param[0] = "-"
		    print "Unknown command $param"
            usage()
        /if
	    website = param
	    if website[-1] <> "/" let website + "/"
	    RECONSTRUCT = true
	/for
	
    if not function_exists("curl_init")
        die("Curl extension must be enabled.")
    /if

    root = getcwd()
    rootlen = root.length()
	scanDirectory(root)

	echo pcounter, " page", plural(pcounter), " scanned in ",  dcounter, " dir", plural(dcounter), ", "
	echo lcounter, " link", plural(lcounter), " redirected."
    if bcounter > 0 
        print
        echo bcounter, " broken link", plural(bcounter), "."
    /if    
	print
	echo mcounter," page", plural(mcounter), " updated."
	print

return 0

main($argc, $argv)
