37 lines
1020 B
Python
37 lines
1020 B
Python
'''
|
|
Reverse a Unicode string with proper handling of combining characters
|
|
'''
|
|
|
|
import unicodedata
|
|
|
|
def ureverse(ustring):
|
|
'''
|
|
Reverse a string including unicode combining characters
|
|
|
|
Example:
|
|
>>> ucode = ''.join( chr(int(n, 16))
|
|
for n in ['61', '73', '20dd', '64', '66', '305'] )
|
|
>>> ucoderev = ureverse(ucode)
|
|
>>> ['%x' % ord(char) for char in ucoderev]
|
|
['66', '305', '64', '73', '20dd', '61']
|
|
>>>
|
|
'''
|
|
groupedchars = []
|
|
uchar = list(ustring)
|
|
while uchar:
|
|
if 'COMBINING' in unicodedata.name(uchar[0], ''):
|
|
groupedchars[-1] += uchar.pop(0)
|
|
else:
|
|
groupedchars.append(uchar.pop(0))
|
|
# Grouped reversal
|
|
groupedchars = groupedchars[::-1]
|
|
|
|
return ''.join(groupedchars)
|
|
|
|
if __name__ == '__main__':
|
|
ucode = ''.join( chr(int(n, 16))
|
|
for n in ['61', '73', '20dd', '64', '66', '305'] )
|
|
ucoderev = ureverse(ucode)
|
|
print (ucode)
|
|
print (ucoderev)
|