Files
AsciiEmoji/ascii_emoticons_data.py
2021-05-19 21:22:41 +02:00

180 lines
5.5 KiB
Python

# -*- coding: utf-8 -*-
# This file is part of Ascii Emoji.
#
# Copyright (C) 2021 Arthur Bols <arthur@bols.dev>
#
# Ascii Emoji is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Ascii Emoji is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Ascii Emoji. If not, see <https://www.gnu.org/licenses/>.
import re
class AsciiEmoticonsData(dict):
def get_regex(self):
# When an emoticon is bordered by an alphanumeric character it
# is NOT expanded. e.g., foo:) NO, foo :) YES, (BRB) NO,
# (:)) YES, etc. We still allow multiple emoticons
# side-by-side like :P:P:P
keys = sorted(self.keys(), key=len, reverse=True)
pre_pattern = ''
post_pattern = ''
emoticon_length = 0
emoticons_pattern = ''
for emoticon in keys:
# escape regexp metachars
emoticon_escaped = re.escape(emoticon)
emoticons_pattern += emoticon_escaped + '|'
if emoticon_length != len(emoticon):
# Build up expressions to match emoticons next to others
pre_pattern = pre_pattern[:-1] + ')|(?<='
post_pattern = post_pattern[:-1] + ')|(?='
emoticon_length = len(emoticon)
pre_pattern += emoticon_escaped + '|'
post_pattern += emoticon_escaped + '|'
# We match from our list of emoticons, but they must either have
# whitespace, or another emoticon next to it to match successfully
# [\w.] alphanumeric and dot (for not matching 8) in (2.8))
emoticons_pattern = r'(?:(?<![\w.]' + \
pre_pattern[:-1] + '))' + '(?:' + \
emoticons_pattern[:-1] + ')' + r'(?:(?![\w]' + \
post_pattern[:-1] + '))'
return emoticons_pattern
# Sort by second column, so matching icons are easy to spot.
ascii_emoticons_data = AsciiEmoticonsData([
('(y)', '\U0001F44d'),
('8)', '\U0001F60e'),
('8-)', '\U0001F60e'),
('8-D', '\U0001F60e'),
('B)', '\U0001F60e'),
('B-)', '\U0001F60e'),
('B-D', '\U0001F60e'),
(':-b', '\U0001F61b'),
(':-P', '\U0001F61b'),
(':-p', '\U0001F61b'),
(':-Þ', '\U0001F61b'),
(':b', '\U0001F61b'),
(':P', '\U0001F61b'),
(':p', '\U0001F61b'),
('', '\U0001F61b'),
('=P', '\U0001F61b'),
('>:P', '\U0001F61c'),
('X-P', '\U0001F61c'),
(':(', '\U0001F61e'),
(':-(', '\U0001F61e'),
(':-[', '\U0001F61e'),
(':[', '\U0001F61e'),
('=(', '\U0001F61e'),
('>:[', '\U0001F61e'),
(':-O', '\U0001F62e'),
(':O', '\U0001F62e'),
('>:O', '\U0001F62e'),
('O_O', '\U0001F62e'),
('</3', '\U0001F494'),
(":')", '\U0001F602'),
(":'-)", '\U0001F602'),
(':-D', '\U0001F603'),
(':D', '\U0001F603'),
('=D', '\U0001F603'),
("':)", '\U0001F605'),
("':-)", '\U0001F605'),
("':-D", '\U0001F605'),
("':D", '\U0001F605'),
("'=)", '\U0001F605'),
("'=D", '\U0001F605'),
('>:)', '\U0001F606'),
('>:-)', '\U0001F606'),
('>;)', '\U0001F606'),
('>=)', '\U0001F606'),
('x-D', '\U0001F606'),
('X-D', '\U0001F606'),
('xD', '\U0001F606'),
('XD', '\U0001F606'),
('0:)', '\U0001F607'),
('0:-)', '\U0001F607'),
('0:-3', '\U0001F607'),
('0:3', '\U0001F607'),
('0;-)', '\U0001F607'),
('0;^)', '\U0001F607'),
('O:)', '\U0001F607'),
('O:-)', '\U0001F607'),
('O:-3', '\U0001F607'),
('O:3', '\U0001F607'),
('O;-)', '\U0001F607'),
('O=)', '\U0001F607'),
('*)', '\U0001F609'),
('*-)', '\U0001F609'),
(';)', '\U0001F609'),
(';-)', '\U0001F609'),
(';-]', '\U0001F609'),
(';]', '\U0001F609'),
(';^)', '\U0001F609'),
(';D', '\U0001F609'),
('-_-', '\U0001F611'),
('-__-', '\U0001F611'),
('-___-', '\U0001F611'),
("':(", '\U0001F613'),
("':-(", '\U0001F613'),
("'=(", '\U0001F613'),
(':-.', '\U0001F615'),
(':-/', '\U0001F615'),
(':/', '\U0001F615'),
(':\\', '\U0001F615'),
(':L', '\U0001F615'),
('=/', '\U0001F615'),
('=\\', '\U0001F615'),
('=L', '\U0001F615'),
('>:/', '\U0001F615'),
('>:\\', '\U0001F615'),
(':*', '\U0001F618'),
(':-*', '\U0001F618'),
(':^*', '\U0001F618'),
('=*', '\U0001F618'),
(':@', '\U0001F620'),
('>:(', '\U0001F620'),
('>:-(', '\U0001F620'),
(":'(", '\U0001F622'),
(":'-(", '\U0001F622'),
(';(', '\U0001F622'),
(';-(', '\U0001F622'),
('>.<', '\U0001F623'),
('D:', '\U0001F628'),
(':$', '\U0001F633'),
('=$', '\U0001F633'),
('#)', '\U0001F635'),
('#-)', '\U0001F635'),
('%)', '\U0001F635'),
('%-)', '\U0001F635'),
('X)', '\U0001F635'),
('X-)', '\U0001F635'),
(':#', '\U0001F636'),
(':-#', '\U0001F636'),
(':-X', '\U0001F636'),
(':X', '\U0001F636'),
('=#', '\U0001F636'),
('=X', '\U0001F636'),
(':)', '\U0001F642'),
(':-)', '\U0001F642'),
(':]', '\U0001F642'),
('=)', '\U0001F642'),
('=]', '\U0001F642'),
('*\\0/*', '\U0001F646'),
('*\\O/*', '\U0001F646'),
('\\0/', '\U0001F646'),
('\\O/', '\U0001F646'),
('<3', '\U00002764\U0000FE0F'),
])