Files
TomoATT/utils/find_2_bytes_chars.py
2025-12-17 10:53:43 +08:00

28 lines
1.1 KiB
Python

import os
import re
def find_kanji_chars_in_file(file_path):
kanji_pattern = re.compile(r'[\u4E00-\u9FFF]')
with open(file_path, 'r', encoding='utf-8', errors='ignore') as file:
content = file.read()
matches = kanji_pattern.findall(content)
if matches:
print(f"Found Kanji characters in {file_path}: {''.join(matches)}")
def scan_directory(directory):
for root, _, files in os.walk(directory):
for file in files:
file_path = os.path.join(root, file)
# skip non-text files
target_files = ['.cpp', '.h', '.hpp', '.c', '.cc', '.hh', '.cxx', '.hxx', '.py', '.txt', '.md', '.rst', '.ipynb', '.sh']
if not file_path.endswith(tuple(target_files)):
continue
find_kanji_chars_in_file(file_path)
if __name__ == "__main__":
#directory_to_scan = '.' # Change this to the directory you want to scan
list_target_dir = ['./src', './test', './include', './examples']
for directory_to_scan in list_target_dir:
scan_directory(directory_to_scan)