Files
LaGriT/docs/edit_scripts/find_unlinked_files.py

76 lines
2.2 KiB
Python
Raw Permalink Normal View History

2025-12-17 11:00:57 +08:00
# Search for unlinked .md files in all files
# USAGE: python find_unlinked_files.py
# for every file .md search all files for use of that .md or .html
# If none found, report unlinked file
# TODO:
# This will not catch duplicate file.md if file.md is used somewhere
# This is just checking .md in .md files, consider other file types
# MUST SET top_dir
# MUST SET file to open and write results
import os, sys
# routine walk through all the directories, search each file
# report .md files not found in any .md file
def find_word_walk(top_dir,searchstring):
count = 0
for root, drs, fles in os.walk(top_dir):
for fle in fles:
# searchstring is file to look for
# infile_name is the full path of searchstring
# froot is the root name of file to look for (will find .md and .html)
# infile is the file to look into
if '.md' in fle:
infile_name = os.path.join(root, fle)
with open(infile_name, 'r') as infile:
fstring = os.path.splitext(fle)
froot = fstring[0]
fhtml = froot+".html"
if searchstring in infile.read():
print 'found %s in file %s' % (searchstring, infile_name)
count = count + 1
if fhtml in infile.read():
print 'found %s in file %s' % (fhtml, infile_name)
count = count + 1
infile.close()
print '%d found files with %s: ' % (count, searchstring)
return count
# main -----------------------------------------------
searchstring = "notset"
top_dir = '/project/eesdev/tam/clone/LaGriT/docs/'
# write to file instead of stdout
sys.stdout=open("find_unlinked_files.out.txt","w")
# for each .md file find a link in another .md file
for root, drs, fles in os.walk(top_dir):
for fle in fles:
# if '.pdf' in fle:
if '.md' in fle:
searchstring = fle
print 'Search files for %s: %s' % (searchstring, root)
itotal = find_word_walk(top_dir,searchstring)
print '%d Total files with %s: %s' % (itotal, searchstring, root)
sys.stdout.close()