initial upload
This commit is contained in:
28
utils/find_2_bytes_chars.py
Normal file
28
utils/find_2_bytes_chars.py
Normal file
@@ -0,0 +1,28 @@
|
||||
import os
|
||||
import re
|
||||
|
||||
def find_kanji_chars_in_file(file_path):
|
||||
kanji_pattern = re.compile(r'[\u4E00-\u9FFF]')
|
||||
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
|
||||
content = file.read()
|
||||
matches = kanji_pattern.findall(content)
|
||||
if matches:
|
||||
print(f"Found Kanji characters in {file_path}: {''.join(matches)}")
|
||||
|
||||
def scan_directory(directory):
|
||||
for root, _, files in os.walk(directory):
|
||||
for file in files:
|
||||
file_path = os.path.join(root, file)
|
||||
# skip non-text files
|
||||
target_files = ['.cpp', '.h', '.hpp', '.c', '.cc', '.hh', '.cxx', '.hxx', '.py', '.txt', '.md', '.rst', '.ipynb', '.sh']
|
||||
if not file_path.endswith(tuple(target_files)):
|
||||
continue
|
||||
|
||||
find_kanji_chars_in_file(file_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
#directory_to_scan = '.' # Change this to the directory you want to scan
|
||||
list_target_dir = ['./src', './test', './include', './examples']
|
||||
|
||||
for directory_to_scan in list_target_dir:
|
||||
scan_directory(directory_to_scan)
|
||||
Reference in New Issue
Block a user