U
    H©I^º=  ã                   @   sò  d dl Z d dlZd dlZd dlZd dlmZ ej e j e j 	e
¡¡¡ d dlZdZdd„ ZG dd„ deƒZedd	Zejd
dddddd ejddddddd ejddddddd ejddddddd ejddd d!d"d#d ejd$d%d&d'd(d)d ejd*d+d&d,d(d-d e ¡ \ZZejr†ejsFe ¡  ed.ƒ e d/¡ e j ej¡rfe j ej¡s†e ¡  ed0ej ƒ e d/¡ ejs´ejr´e j ej¡ d1¡d  d2 e_ejsÖe ¡  ed3ƒ e d/¡ d4d5„ Ze d6krîeƒ  dS )7é    N)ÚOptionParserz 	
"$&<>,+=#!()'|{}[]?~`;%\c                 C   s   t dd„ | D ƒƒ S )a  
    Keynames must be ascii, and must not contain any invalid characters

    >>> gconf_valid_keyname('nyannyan')
    True

    >>> gconf_valid_keyname('nyan nyan')
    False

    >>> gconf_valid_keyname('nyannyan[')
    False

    >>> gconf_valid_keyname('nyan	nyan')
    False
    c                 s   s"   | ]}|t kpt|ƒd kV  qdS )é   N)Ú_INVALID_KEYNAME_CHARSÚord)Ú.0Úchar© r   ú+/usr/share/ibus-table/engine/tabcreatedb.pyÚ	<genexpr>4   s   ÿz&gconf_valid_keyname.<locals>.<genexpr>)Úany)Zkeynamer   r   r	   Úgconf_valid_keyname$   s    ÿr   c                       s(   e Zd ZdZ‡ fdd„Zdd„ Z‡  ZS )ÚInvalidTableNamez4
    Raised when an invalid table name is given
    c                    s   t t| ƒ ¡  || _d S ©N)Úsuperr   Ú__init__Ú
table_name)ÚselfÚname©Ú	__class__r   r	   r   ;   s    zInvalidTableName.__init__c                 C   s   d| j  dt  d S )NzValue of NAME attribute (%s) zcannot contain any of %r zand must be all ascii)r   r   )r   r   r   r	   Ú__str__?   s
    ÿþzInvalidTableName.__str__)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   Ú__classcell__r   r   r   r	   r   7   s   r   zusage: %prog [options])Zusagez-nz--nameZstorer   Ú a  specifies the file name for the binary database for the IME. The default is "%default". If the file name of the database is not specified, the file name of the source file before the first "." will be appended with ".db" and that will be used as the file name of the database.)ÚactionÚdestÚdefaultÚhelpz-sz--sourceÚsourcezSspecifies the file which contains the source of the IME. The default is "%default".z-ez--extraÚextrazSspecifies the file name for the extra words for the IME. The default is "%default".z-pz--pinyinÚpinyinz//usr/share/ibus-table/data/pinyin_table.txt.bz2zEspecifies the source file for the  pinyin. The default is "%default".z-oz--no-create-indexZstore_falseÚindexTzoDo not create an index for a database (Only for distrubution purposes, a normal user should not use this flag!)z-iz--create-index-onlyÚ
store_trueÚ
only_indexFz¡Only create an index for an existing database. Specifying the file name of the binary database with the -n or --name option is required when this option is used.z-dz--debugÚdebugzPrint extra debug messages.zM
Please specify the file name of the database you want to create an index on!é   z'
The database file '%s' does not exist.Ú.z.dbzC
You need to specify the file which contains the source of the IME!c                     s(  dd„ } t js,zt t j¡ W n   Y nX | dƒ tjt jd dd‰ dd„ }dd	„ }d
d„ }dd„ }dd„ }dd„ }dd„ }‡ fdd„}dd„ }	t jrÎ| dƒ | dƒ ˆ  ¡  | dƒ ˆ  d¡ | dƒ dS | dt j	 ƒ t
 d¡}
|
 t j	¡}|rt t j	d ¡ ¡ }ntt j	d d!d" ¡ }| d#d$¡}| d$¡}| d%ƒ ||ƒ\}}}| d&ƒ t||ƒƒ}| |	|ƒ¡ | d'ƒ ˆ  |¡ ˆ  d¡ | d(ƒ ||ƒ}| d)ƒ ˆ  |¡ ˆ j d*¡ ¡ d+krä| d,ƒ ||ƒ}| d-ƒ ˆ  |¡ ˆ j d.¡ ¡ d+krh| d/t j ƒ |
 t j¡}|r*t t jd ¡}ntt jd ƒ}| d0ƒ ||ƒ}| d1ƒ ||ƒ}| d2ƒ ˆ  |¡ | dƒ ˆ  ¡  ˆ j d*¡ ¡ d+krît jrî| d3ƒ ˆ  d¡ | d4t j ƒ |
 t j¡}|rØt t jd ¡}ntt jd ƒ}| d5ƒ ||ƒ}| d6ƒ ||ƒ}| d7t|ƒ ƒ i }|D ]$}|  d8|d |d9 f |i¡ q| d:t|ƒ ƒ i }|D ]$}|  d8|d |d9 f |i¡ qZ| d;t|ƒ ƒ |D ]}||kr”| !|¡ q”| d<t|ƒ ƒ t| "¡ ƒ}| d=ƒ ˆ  |¡ | dƒ ˆ  ¡  t j#r
| dƒ ˆ  d¡ n| d>ƒ ˆ  $d¡ | dƒ d S )?Nc                 S   s   t jrt| ƒ d S r   )Ú_OPTIONSr'   Úprint)Úmessager   r   r	   Údebug_print¢   s    zmain.<locals>.debug_printzProcessing DatabaseT)ÚfilenameZuser_dbZcreate_databasec                 S   sX  g }g }g }t  d¡}t  d¡}t  d¡}t  d¡}t  d¡}| D ]P}	| |	¡sB| |	¡sB||f||f||ffD ] \}
}|
 |	¡rp| |	¡  qBqpqB|sNi }|D ]€}	| |	¡}|r¢t| d¡ƒdkr¢| d¡|krt| d¡ƒt|| d¡ ƒkr"| d¡|| d¡< q¢| d¡|| d¡< q¢|D ]}| d||| f ¡ q(| ¡  |||fS )	Nú^###.*ú^[ \t]*$z[^\t]*=[^\t]*z$([^\t]+)\t([^\t]+)\t([0-9]+)(\t.*)?$z *[^\s]+ *\t *[^\s]+ *$r(   é   ú%s	%s)ÚreÚcompileÚmatchÚappendÚlenÚgroupÚsort)ÚfZ_attriZ_tableZ_gouciÚpatt_comÚ
patt_blankZ	patt_confZ
patt_tableZ
patt_gouciÚlineZ_pattZ_listZ
gouci_dictÚresÚkeyr   r   r	   Úparse_source±   s<    




ý


"zmain.<locals>.parse_sourcec           
   
   S   s¸   g }t  d¡}t  d¡}t  d¡}t  d¡}| D ]z}t|ƒtdƒkrN| d¡}| |¡s0| |¡s0| |¡}|r0| | d¡¡}|D ]$}	| d| d	¡|	| d
¡f ¡ q„q0|d d … S )Nz^#.*r0   z(.*)\t(.*)\t(.*)z[a-z]+[1-5]r   úutf-8r(   z%s	%s	%sr1   é   )r3   r4   ÚtypeÚdecoder5   Úfindallr8   r6   )
r:   Z_pinyinsr;   r<   Zpatt_pyZpatt_yinr=   r>   ZyinsZyinr   r   r	   Úparse_pinyinè   s"    





ÿ
zmain.<locals>.parse_pinyinc                 S   sr   g }t  d¡}t  d¡}t  d¡}| D ]F}t|ƒtdƒkrD| d¡}| |¡s&| |¡s&| |¡r&| |¡ q&|S )Nr/   r0   z
(.*)\t(.*)r   rA   )r3   r4   rC   rD   r5   r6   )r:   Z_extrar;   r<   Z
patt_extrar=   r   r   r	   Úparse_extraû   s    




zmain.<locals>.parse_extrac                 s   sF   | D ]<}t |ƒt dƒkr"| d¡}| ¡  ¡ \}}}|||fV  qd S ©Nr   rA   ©rC   rD   ÚstripÚsplit)r:   Zpinyin_lineZ_ziZ_pinyinZ_freqr   r   r	   Úpinyin_parser
  s
    
zmain.<locals>.pinyin_parserc                 S   sf   g }| D ]X}t |ƒt dƒkr&| d¡}| d¡d d… \}}}|dkrJd}| ||t|ƒdf¡ q|S )Nr   rA   ú	rB   ZNOSYMBOLr   )rC   rD   rK   r6   Úint)r:   Zphrase_listr=   ZxingmaÚphraseÚfreqr   r   r	   Úphrase_parser  s    
zmain.<locals>.phrase_parserc                 s   sB   | D ]8}t |ƒt dƒkr"| d¡}| ¡  ¡ \}}||fV  qd S rH   rI   )r:   r=   ZziZgcmr   r   r	   Úgoucima_parser  s
    
zmain.<locals>.goucima_parserc                 s   s|   | D ]r}t |ƒt dƒkr"| d¡}z| ¡  d¡\}}W n   | ¡  d¡\}}Y nX | ¡  ¡ }| ¡ }||fV  qd S )Nr   rA   ú=z==)rC   rD   rJ   rK   Úlower)r:   r=   ÚattrÚvalr   r   r	   Úattribute_parser#  s    
zmain.<locals>.attribute_parserc                    sj   g }| D ]\}t |ƒt dƒkr&| d¡}| ¡  ¡ \}}ˆ  |¡}|rX| |||df¡ qtd| ƒ q|S )Nr   rA   r   u+   No tabkeys found for â€œ%sâ€, not adding.
)rC   rD   rJ   rK   Zparse_phraser6   r+   )r:   Z
extra_listr=   rO   rP   Z_tabkey©Zdbr   r	   Úextra_parser/  s    

zmain.<locals>.extra_parserc                 S   s„   i }d}| D ]j}t |ƒt dƒkr*| d¡}t d|¡r<d}q|sBqt d|¡rR qxt d|¡}|r| d¡|| d	¡< qd
t|ƒfS )uê   
        Returns something like

        ("char_prompts", "{'a': 'æ—¥', 'b': 'æ—¥', 'c': 'é‡‘', ...}")

        i.e. the attribute name "char_prompts" and as its value
        the string representation of a Python dictionary.
        Fr   rA   z^BEGIN_CHAR_PROMPTS_DEFINITIONTz^END_CHAR_PROMPTS_DEFINITIONz(^(?P<char>[^\s]+)[\s]+(?P<prompt>[^\s]+)Úpromptr   Úchar_prompts)rC   rD   r3   r5   Úsearchr8   Úrepr)r:   r[   Ústartr=   r5   r   r   r	   Úget_char_prompts<  s&    	
 ÿzmain.<locals>.get_char_promptszOnly create IndexeszOptimizing database zCreate Indexes ÚmainzDone! :Dr   z	Load sources "%s"z.*\.bz2ÚrzUTF-8)ÚmodeÚencodingz
Ú
z	Parsing table source file z	  get attribute of IME :)z	  add attributes into DB z	  get phrases of IME :)z	  add phrases into DB Zuser_can_define_phraseÚtruez	  get goucima of IME :)z	  add goucima into DB Zpinyin_modez	Load pinyin source "%s"z	Parsing pinyin source file z	Preapring pinyin entriesz	  add pinyin into DB z!	Preparing for adding extra wordsz	Load extra words source "%s"z!	Parsing extra words source file z	Preparing extra words linesz'	  we have %d extra phrases from sourcer2   r1   z!	  the len of orig_phrases is: %dz"	  the len of extra_phrases is: %dz!	  %d extra phrases will be addedz	Adding extra words into DB zkWe don't create an index on the database, you should only activate this function for distribution purposes.)%r*   r&   ÚosÚunlinkr   ÚtabsqlitedbZTabSqliteDbZoptimize_databaseZcreate_indexesr!   r3   r4   r5   Úbz2ZBZ2FileÚreadÚopenÚreplacerK   Úlistr6   Z
update_imeZcreate_tablesZadd_phrasesZime_propertiesÚgetrT   Zadd_goucimar#   Z
add_pinyinr"   r7   ÚupdateÚpopÚvaluesr$   Zdrop_indexes)r-   r@   rF   rG   rL   rQ   rR   rW   rY   r_   Zpatt_sZ_bz2sr!   ZattriÚtableZgouciZ
attributesZphrasesZgoucimaZ_bz2pZpinyin_sZpyliner#   Zextra_sZ	extralineZ
extrawordsZorig_phrasesrO   Zextra_phrasesZ	extrawordZnew_phrasesr   rX   r	   r`   ¡   sÞ    þ7







ÿ
ÿ"ÿ

ÿ
r`   Ú__main__)!rf   Úsysri   r3   Zoptparser   Úpathr6   ÚdirnameÚabspathÚ__file__rh   r   r   Ú	Exceptionr   Z_OPTION_PARSERZ
add_optionÚ
parse_argsr*   Z_ARGSr&   r   Z
print_helpr+   ÚexitÚexistsÚisfiler!   ÚbasenamerK   r`   r   r   r   r   r	   Ú<module>   s¶   
 ú ú	 ú	 ú	 ú
 ú ûÿ
 
ÿ
  '
