close Warning: AdminModule failed with TracError: Unable to instantiate component <class 'trac.admin.web_ui.PluginAdminPanel'> (super(type, obj): obj must be an instance or subtype of type)

HabitSystemV3: unitok_Ethiopian.py

File unitok_Ethiopian.py, 3.6 KB (added by xsuchom2, 8 years ago)
Line 
1# coding=utf-8
2
3import re
4
5SGML_TAG = ur"""
6 <!-- .*? --> # XML/SGML comment
7 | # -- OR --
8 <[!?/]?(?!\d)\w[-\.:\w]* # Start of tag/directive
9 ( # Attributes
10 [^>'"]* # - attribute name (+whitespace +equal sign)
11 ('[^']*'|"[^"]*") # - attribute value
12 )*
13 \s* # Spaces at the end
14 /? # Forward slash at the end of singleton tags
15 \s* # More spaces at the end
16 > # +End of tag/directive
17"""
18SGML_TAG_RE = re.compile(SGML_TAG, re.UNICODE | re.VERBOSE | re.DOTALL)
19
20WHITESPACE = ur"\s+"
21WHITESPACE_RE = re.compile(WHITESPACE)
22
23DNS_HOST = ur"(([-a-z0-9]+\.)+[a-z]{2,})"
24
25URL = ur"""
26 (
27 # scheme://[user:password]
28 (ftps?|https?|file)://([-a-z0-9_;?&=](:[-a-z0-9_;?&=]*)?@)?
29 # or "www" without the scheme part
30 |www\.
31 )
32 # DNS host / localhost / IP
33 (""" + DNS_HOST + """
34 | localhost |
35 ([0-9]{1,3}\.){3}[0-9]{1,3})
36 # Port specification (optional)
37 (:[0-9]+)?
38 # Scheme specific extension (optional)
39 (/[-\w;/?:@=&\$_.+!*'(~#%,]*)?
40"""
41URL_RE = re.compile(URL, re.VERBOSE | re.IGNORECASE | re.UNICODE)
42
43EMAIL = ur"[-a-z0-9._']+@" + DNS_HOST
44EMAIL_RE = re.compile(EMAIL, re.IGNORECASE)
45
46HTMLENTITY = ur"&(#x?[0-9A-F]+|\w+);"
47HTMLENTITY_RE = re.compile(HTMLENTITY)
48
49DOTCOM = ur"""
50(?<!\w)
51 ([-a-z0-9]+\.){1,2}(com|org)
52(?!\w)
53"""
54DOTCOM_RE = re.compile(DOTCOM, re.IGNORECASE | re.VERBOSE)
55
56NUMBER = ur"""
57(?<!\S)
58 [-+]?
59 (\d[\d,.]*\d | \d)
60 ([eE][-+][0-9]+)?
61(?![-\w])
62"""
63NUMBER_RE = re.compile(NUMBER, re.UNICODE | re.VERBOSE)
64
65ABBREVIATION = ur"""
66(?<!\w)
67 (?:
68 #general
69 co\.|etc\.|inc\.|ltd\.|dr\.|prof\.|jr\.
70 )
71"""
72ABBREVIATION_RE = re.compile(ABBREVIATION, re.UNICODE | re.VERBOSE)
73
74USA = ur"""
75(?<!\w)
76 ([A-Z]\.)+([A-Z](?!\w))?
77"""
78USA_RE = re.compile(USA, re.UNICODE | re.VERBOSE)
79
80WORD = ur"\w[\w-]*\w|\w"
81WORD_RE = re.compile(WORD, re.UNICODE)
82
83#Standard word: ur"\w[\w-]*\w|\w"
84#Special for Ethiopian languages: An apostrophe is a part of a word in case
85# there is a letter on both sides of the apostrophe
86# and there is no number on any side of the apostrophe.
87WORD = ur"(?:(?!\d)(?:[\w-]|(?<!\d)(?<=\w)'(?=\w)(?!\d)))+"
88WORD_RE = re.compile(WORD, re.UNICODE)
89
90MULTICHAR_PUNCTUATION = ur"([?!]+|'')"
91MULTICHAR_PUNCTUATION_RE = re.compile(MULTICHAR_PUNCTUATION)
92
93SINGLECHAR_PUNCTUATION = ur"[\u0028\u005b\u007b\u0029\u005d\u007d\u2985\u2989\u3008\u298d\u300c\u2991\u3010\u2995\u3014\u2018\u169b\u201c\xab\u23b5\xbb\u0f3a\ufd3e\u29d9\u27e9\u276a\u276e\u2772\u29fd\u2986\u300b\u298a\u300f\u298e\u2992\u3017\u3018\u301b\u169c\u301f\u0f3d\u29da\u27e6\u2769\u27ea\u276d\u2771\u2775\u2983\u2987\u298b\u300a\u298f\u300e\u2993\u2997\u3016\u301a\u301e\u203a\u0f3c\u2046\u29db\u27e7\u2768\u27eb\u276c\u2770\u2774\u3019\u2984\u3009\u2988\u2996\u300d\u298c\u3011\u2990\u3015\u2994\u2019\u2998\u201d\u301d\u23b4\u2039\u0f3b\ufd3f\u2045\u29d8\u27e8\u276b\u276f\u2773\u29fc]"
94SINGLECHAR_PUNCTUATION_RE = re.compile(SINGLECHAR_PUNCTUATION, re.UNICODE)
95
96ANY_SEQUENCE = ur"(.)\1*"
97ANY_SEQUENCE_RE = re.compile(ANY_SEQUENCE)
98
99re_list = [
100 ('SGML_TAG', SGML_TAG_RE),
101 ('WHITESPACE', WHITESPACE_RE),
102 ('URL', URL_RE),
103 ('EMAIL', EMAIL_RE),
104 ('HTMLENTITY', HTMLENTITY_RE),
105 ('DOTCOM', DOTCOM_RE),
106 ('NUMBER', NUMBER_RE),
107 ('ABBREVIATION', ABBREVIATION_RE),
108 ('USA', USA_RE),
109 ('WORD', WORD_RE),
110 ('MULTICHAR_PUNCTUATION', MULTICHAR_PUNCTUATION_RE),
111 ('SINGLECHAR_PUNCTUATION', SINGLECHAR_PUNCTUATION_RE),
112 ('ANY_SEQUENCE', ANY_SEQUENCE_RE),
113]