1. 使用`ord()`函数和Unicode范围:
def is_chinese(char):
return u'\u4e00' <= char <= u'\u9fff'
2. 使用`unicodedata`库分析字符类别:
import unicodedata
def is_chinese(char):
return 'CJK' in unicodedata.name(char)
3. 使用正则表达式匹配:
import re
def is_chinese(word):
pattern = re.compile(u'[\u4e00-\u9fa5]')
return bool(pattern.search(word))
4. 借助GB2312或GBK字符集:
def is_chinese_gbk(word):
return len(word.encode('gbk')) == len(word.encode('utf-8'))
def str_count(s):
count_en = count_dg = count_sp = count_zh = count_pu = 0
for c in s:
if u'\u4e00' <= c <= u'\u9fff':
count_zh += 1
elif c.isalpha():
count_en += 1
其他字符计数
return count_zh > 0
6. 使用`is_not_en_word`和`is_en_mail`函数:
def is_not_en_word(word):
return any(u'\u4e00' <= c <= u'\u9fff' for c in word)
def is_en_mail(mail_text):
return sum(1 for c in mail_text if u'\u4e00' <= c <= u'\u9fff') / len(mail_text) > 0.1
您可以根据需要选择合适的方法来检测字符串中是否包含中文字符