initial upload
This commit is contained in:
72
docs/edit_scripts/find_duplicate_files.py
Executable file
72
docs/edit_scripts/find_duplicate_files.py
Executable file
@@ -0,0 +1,72 @@
|
||||
# Search for unlinked .md files in all files
|
||||
# USAGE: python find_duplicate_files.py
|
||||
# for every file .md search all files for duplicates
|
||||
|
||||
# This is just checking .md files, consider other file types
|
||||
|
||||
# MUST SET top_dir
|
||||
# MUST SET file to open and write results
|
||||
|
||||
import os, sys, re
|
||||
|
||||
|
||||
|
||||
# routine walk through all the directories, search each file
|
||||
|
||||
def find_word_walk(top_dir,searchstring):
|
||||
|
||||
count = 0
|
||||
|
||||
for root, drs, fles in os.walk(top_dir):
|
||||
for fle in fles:
|
||||
|
||||
# searchstring is file name to look for
|
||||
# infile_name is the full path of searchstring
|
||||
# froot is the root name of file to look for (will find .md and .html)
|
||||
# fle is the file to check
|
||||
|
||||
if '.md' in fle:
|
||||
|
||||
infile_name = os.path.join(root, fle)
|
||||
|
||||
fstring = os.path.splitext(fle)
|
||||
froot = fstring[0]
|
||||
fhtml = froot+".html"
|
||||
|
||||
result = re.match(searchstring,fle)
|
||||
# print 'check name %s current file: %s' % (searchstring, fle)
|
||||
|
||||
if result :
|
||||
count = count + 1
|
||||
if count > 1 :
|
||||
print 'match %s with file %s' % (searchstring, infile_name)
|
||||
|
||||
|
||||
# print '%d duplicate files for %s: ' % (count, searchstring)
|
||||
return count
|
||||
|
||||
# main -----------------------------------------------
|
||||
|
||||
searchstring = "notset"
|
||||
top_dir = '/project/eesdev/tam/clone/LaGriT/docs/'
|
||||
|
||||
# write to file instead of stdout
|
||||
sys.stdout=open("find_duplicate_files.out.txt","w")
|
||||
|
||||
|
||||
# for each .md file find a link in another .md file
|
||||
for root, drs, fles in os.walk(top_dir):
|
||||
for fle in fles:
|
||||
|
||||
# if '.pdf' in fle:
|
||||
if '.md' in fle:
|
||||
|
||||
searchstring = fle
|
||||
# print 'Search files for %s: %s' % (searchstring, root)
|
||||
|
||||
itotal = find_word_walk(top_dir,searchstring)
|
||||
|
||||
if itotal > 1:
|
||||
print '%d duplicates with %s: %s' % (itotal, searchstring, root)
|
||||
|
||||
sys.stdout.close()
|
||||
Reference in New Issue
Block a user