180 lines
5.5 KiB
Python
180 lines
5.5 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# This file is part of Ascii Emoji.
|
|
#
|
|
# Copyright (C) 2021 Arthur Bols <arthur@bols.dev>
|
|
#
|
|
# Ascii Emoji is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Ascii Emoji is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with Ascii Emoji. If not, see <https://www.gnu.org/licenses/>.
|
|
|
|
import re
|
|
|
|
|
|
class AsciiEmoticonsData(dict):
|
|
def get_regex(self):
|
|
# When an emoticon is bordered by an alphanumeric character it
|
|
# is NOT expanded. e.g., foo:) NO, foo :) YES, (BRB) NO,
|
|
# (:)) YES, etc. We still allow multiple emoticons
|
|
# side-by-side like :P:P:P
|
|
|
|
keys = sorted(self.keys(), key=len, reverse=True)
|
|
pre_pattern = ''
|
|
post_pattern = ''
|
|
emoticon_length = 0
|
|
emoticons_pattern = ''
|
|
for emoticon in keys:
|
|
# escape regexp metachars
|
|
emoticon_escaped = re.escape(emoticon)
|
|
emoticons_pattern += emoticon_escaped + '|'
|
|
if emoticon_length != len(emoticon):
|
|
# Build up expressions to match emoticons next to others
|
|
pre_pattern = pre_pattern[:-1] + ')|(?<='
|
|
post_pattern = post_pattern[:-1] + ')|(?='
|
|
emoticon_length = len(emoticon)
|
|
pre_pattern += emoticon_escaped + '|'
|
|
post_pattern += emoticon_escaped + '|'
|
|
# We match from our list of emoticons, but they must either have
|
|
# whitespace, or another emoticon next to it to match successfully
|
|
# [\w.] alphanumeric and dot (for not matching 8) in (2.8))
|
|
emoticons_pattern = r'(?:(?<![\w.]' + \
|
|
pre_pattern[:-1] + '))' + '(?:' + \
|
|
emoticons_pattern[:-1] + ')' + r'(?:(?![\w]' + \
|
|
post_pattern[:-1] + '))'
|
|
return emoticons_pattern
|
|
|
|
|
|
# Sort by second column, so matching icons are easy to spot.
|
|
ascii_emoticons_data = AsciiEmoticonsData([
|
|
('(y)', '\U0001F44d'),
|
|
('8)', '\U0001F60e'),
|
|
('8-)', '\U0001F60e'),
|
|
('8-D', '\U0001F60e'),
|
|
('B)', '\U0001F60e'),
|
|
('B-)', '\U0001F60e'),
|
|
('B-D', '\U0001F60e'),
|
|
(':-b', '\U0001F61b'),
|
|
(':-P', '\U0001F61b'),
|
|
(':-p', '\U0001F61b'),
|
|
(':-Þ', '\U0001F61b'),
|
|
(':b', '\U0001F61b'),
|
|
(':P', '\U0001F61b'),
|
|
(':p', '\U0001F61b'),
|
|
(':Þ', '\U0001F61b'),
|
|
('=P', '\U0001F61b'),
|
|
('>:P', '\U0001F61c'),
|
|
('X-P', '\U0001F61c'),
|
|
(':(', '\U0001F61e'),
|
|
(':-(', '\U0001F61e'),
|
|
(':-[', '\U0001F61e'),
|
|
(':[', '\U0001F61e'),
|
|
('=(', '\U0001F61e'),
|
|
('>:[', '\U0001F61e'),
|
|
(':-O', '\U0001F62e'),
|
|
(':O', '\U0001F62e'),
|
|
('>:O', '\U0001F62e'),
|
|
('O_O', '\U0001F62e'),
|
|
('</3', '\U0001F494'),
|
|
(":')", '\U0001F602'),
|
|
(":'-)", '\U0001F602'),
|
|
(':-D', '\U0001F603'),
|
|
(':D', '\U0001F603'),
|
|
('=D', '\U0001F603'),
|
|
("':)", '\U0001F605'),
|
|
("':-)", '\U0001F605'),
|
|
("':-D", '\U0001F605'),
|
|
("':D", '\U0001F605'),
|
|
("'=)", '\U0001F605'),
|
|
("'=D", '\U0001F605'),
|
|
('>:)', '\U0001F606'),
|
|
('>:-)', '\U0001F606'),
|
|
('>;)', '\U0001F606'),
|
|
('>=)', '\U0001F606'),
|
|
('x-D', '\U0001F606'),
|
|
('X-D', '\U0001F606'),
|
|
('xD', '\U0001F606'),
|
|
('XD', '\U0001F606'),
|
|
('0:)', '\U0001F607'),
|
|
('0:-)', '\U0001F607'),
|
|
('0:-3', '\U0001F607'),
|
|
('0:3', '\U0001F607'),
|
|
('0;-)', '\U0001F607'),
|
|
('0;^)', '\U0001F607'),
|
|
('O:)', '\U0001F607'),
|
|
('O:-)', '\U0001F607'),
|
|
('O:-3', '\U0001F607'),
|
|
('O:3', '\U0001F607'),
|
|
('O;-)', '\U0001F607'),
|
|
('O=)', '\U0001F607'),
|
|
('*)', '\U0001F609'),
|
|
('*-)', '\U0001F609'),
|
|
(';)', '\U0001F609'),
|
|
(';-)', '\U0001F609'),
|
|
(';-]', '\U0001F609'),
|
|
(';]', '\U0001F609'),
|
|
(';^)', '\U0001F609'),
|
|
(';D', '\U0001F609'),
|
|
('-_-', '\U0001F611'),
|
|
('-__-', '\U0001F611'),
|
|
('-___-', '\U0001F611'),
|
|
("':(", '\U0001F613'),
|
|
("':-(", '\U0001F613'),
|
|
("'=(", '\U0001F613'),
|
|
(':-.', '\U0001F615'),
|
|
(':-/', '\U0001F615'),
|
|
(':/', '\U0001F615'),
|
|
(':\\', '\U0001F615'),
|
|
(':L', '\U0001F615'),
|
|
('=/', '\U0001F615'),
|
|
('=\\', '\U0001F615'),
|
|
('=L', '\U0001F615'),
|
|
('>:/', '\U0001F615'),
|
|
('>:\\', '\U0001F615'),
|
|
(':*', '\U0001F618'),
|
|
(':-*', '\U0001F618'),
|
|
(':^*', '\U0001F618'),
|
|
('=*', '\U0001F618'),
|
|
(':@', '\U0001F620'),
|
|
('>:(', '\U0001F620'),
|
|
('>:-(', '\U0001F620'),
|
|
(":'(", '\U0001F622'),
|
|
(":'-(", '\U0001F622'),
|
|
(';(', '\U0001F622'),
|
|
(';-(', '\U0001F622'),
|
|
('>.<', '\U0001F623'),
|
|
('D:', '\U0001F628'),
|
|
(':$', '\U0001F633'),
|
|
('=$', '\U0001F633'),
|
|
('#)', '\U0001F635'),
|
|
('#-)', '\U0001F635'),
|
|
('%)', '\U0001F635'),
|
|
('%-)', '\U0001F635'),
|
|
('X)', '\U0001F635'),
|
|
('X-)', '\U0001F635'),
|
|
(':#', '\U0001F636'),
|
|
(':-#', '\U0001F636'),
|
|
(':-X', '\U0001F636'),
|
|
(':X', '\U0001F636'),
|
|
('=#', '\U0001F636'),
|
|
('=X', '\U0001F636'),
|
|
(':)', '\U0001F642'),
|
|
(':-)', '\U0001F642'),
|
|
(':]', '\U0001F642'),
|
|
('=)', '\U0001F642'),
|
|
('=]', '\U0001F642'),
|
|
('*\\0/*', '\U0001F646'),
|
|
('*\\O/*', '\U0001F646'),
|
|
('\\0/', '\U0001F646'),
|
|
('\\O/', '\U0001F646'),
|
|
('<3', '\U00002764\U0000FE0F'),
|
|
])
|