U
    HI^                     @   s   d Z ddlZddlmZ ddlZddlZddlZddlZddlZddl	Z	ddl
Z
ejdk rlee ed edadZdZG dd dZG d	d
 d
ZdS )z7
Module for ibus-table to access the sqlite3 databases
    N)   r   r   utf-81.00u!  “”‘’《》〈〉〔〕「」『』【】〖〗（）［］｛｝．。，、；：？！…—·ˉˇ¨々～‖∶＂＇｀｜⒈⒉⒊⒋⒌⒍⒎⒏⒐⒑⒒⒓⒔⒕⒖⒗⒘⒙⒚⒛АБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯЁⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ⒈⒉⒊⒋⒌⒍⒎⒏⒐⒑⒒⒓⒔⒕⒖⒗⒘⒙⒚⒛㎎㎏㎜㎝㎞㎡㏄㏎㏑㏒㏕ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄⒅⒆⒇€＄￠￡￥¤→↑←↓↖↗↘↙ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶーヽヾぁあぃいぅうぇえぉおかがきぎぱくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをん゛゜ゝゞ勹灬冫艹屮辶刂匚阝廾丨虍彐卩钅冂冖宀疒肀丿攵凵犭亻彡饣礻扌氵纟亠囗忄讠衤廴尢夂丶āáǎàōóǒòêēéěèīíǐìǖǘǚǜüūúǔù＋－＜＝＞±×÷∈∏∑∕√∝∞∟∠∣∥∧∨∩∪∫∮∴∵∶∷∽≈≌≒≠≡≤≥≦≧≮≯⊕⊙⊥⊿℃°‰♂♀§№☆★○●◎◇◆□■△▲※〓＃＆＠＼＾＿￣абвгдежзийклмнопрстуфхцчшщъыьэюяёⅰⅱⅲⅳⅴⅵⅶⅷⅸⅹβγδεζηαικλμνξοπρστυφθψω①②③④⑤⑥⑦⑧⑨⑩①②③④⑤⑥⑦⑧⑨⑩㈠㈡㈢㈣㈤㈥㈦㈧㈨㈩㈠㈡㈢㈣㈤㈥㈦㈧㈨㈩ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄘㄙㄧㄨㄩㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦc                   @   s*   e Zd ZdZd	ddZdd Zdd ZdS )
ImePropertiesz=
    A class to cache the properties of an input method.
    Nc                 C   sn   |dkri }|sdS || _ d}z|| }W n   ddl}|  Y nX |D ]}|d | j |d < qRdS )ul   
        “db” is the handle of the sqlite3 database file obtained by
        sqlite3.connect().
        NzSELECT attr, val FROM main.ime;r      )ime_property_cacheexecutefetchall	traceback	print_exc)selfdbdefault_propertiessqlstrresultsr
   result r   +/usr/share/ibus-table/engine/tabsqlitedb.py__init__P   s    zImeProperties.__init__c                 C   s   || j kr| j | S dS )z
        Return the value for a key from the property cache

        :param key: The key to lookup in the property cache
        :type key: String
        :rtype: String
        N)r   )r   keyr   r   r   getc   s    

zImeProperties.getc                 C   s   dt | j S )Nzime_property_cache = %s)reprr   r   r   r   r   __str__o   s    zImeProperties.__str__)NN)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   L   s   
r   c                   @   sv  e Zd ZdZd^ddZd_d
dZdd Zdd Zd`ddZdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zdad-d.Zdbd/d0Zd1d2 Zdcd3d4Zd5d6 Zd7d8 Zddd9d:Zd;d< Zded?d@ZdfdAdBZdgdCdDZ dEdF Z!dGdH Z"dIdJ Z#dKdL Z$dMdN Z%dOdP Z&dhdQdRZ'didSdTZ(djdUdVZ)dWdX Z*dkdYdZZ+dld\d]Z,dS )mTabSqliteDbu  Phrase database for tables

    The phrases table in the database has columns with the names:

    “id”, “tabkeys”, “phrase”, “freq”, “user_freq”

    There are 2 databases, sysdb, userdb.

    sysdb: System database for the input method, for example something
           like /usr/share/ibus-table/tables/wubi-jidian86.db
           “user_freq” is always 0 in a system database.  “freq”
           is some number in a system database indicating a frequency
           of use of that phrase relative to the other phrases in that
           database.

    user_db: Database on disk where the phrases used or defined by the
           user are stored. “user_freq” is a counter which counts how
           many times that combination of “tabkeys” and “phrase” has
           been used. “freq” is equal to 0 for all combinations of
           “tabkeys” and “phrase” where an entry for that phrase is
           already in the system database which starts with the same
           “tabkeys”.
           For combinations of “tabkeys” and “phrase” which do not exist
           at all in the system database, “freq” is equal to -1 to
           indidated that this is a user defined phrase.
    NFc              #   C   s  zt tdaW n  ttfk
r2   t daY nX g | _|| _|| _| 	  |s`tj
| jrpt| j| _ntd| j  zX| jd | jd | jd | jd | jd | jd	 | jd
 W n$   dd l}|  td Y nX | jd ddddddt  dtd dddddddddddddddddddddddddddd"| _|rd }d!}t| jD ]8}|| j| d"}| j|| s~| j|| q~t| j| jd#| _t | jd$| _| jd%| _|   | _!| " | _#| jd&| _$| j$r*t%| j$& dk| _$ntd' d(| _$| jd)| _'| j'rdt%| j'& dk| _'ntd* d(| _'| ( | _)| * | _+| , | _-dd l.}	t
/|	0 d+}
tj
1| j2d,d-}t
/|
|| _3| 4  |r|rd S |d.krt
5|
stj
/td/d0}t
5|rt6tj
/|d1tj7rHt8tj
/|d1 t6tj
/|d2tj7rvt8tj
/|d2 t9:||
 t9;| t<|
| ntj=|
d3d4 t
/|
|}t
>|st?j@Ad5d6|i  nz| B|}d7d8d9d:d;g}|d ks |d< tCks | D|tE|krt?j@Ad=|  |d krXt?j@Ad> | jF|d?d@| _n|d< tCkrt?j@AdAdBtC  dC|d<    | jF||d< d@| _nL| D|tE|krt?j@AdDdEtE|  dF| D|  dG dH dI  d | _ddJlm} |dK}t?j@AdL| |  tj
>|r.tG|||  tj
>|dM rXtG|dM |dM |  tj
>|dN rtG|dN |dN |  t?j@AdO|  | H| t?j@AdP nt?j@AdQ|  W n   dd l}|  Y nX z(t?j@AdRdS|i  | jdT|  W n   t?j@AdU|  ddJlm} |dK}t?j@AdL| |  tj
>|r`tG|||  tj
>|dM rtG|dM |dM |  tj
>|dN rtG|dN |dN |  t?j@AdV|  | H| | jdT|  Y nX | IdW | jrvg }| jD ]*}|J|d |dX |dY |dZ d[ q d\}z| jK|| W n   dd l}|  Y nX | jL  | jd] | MdW | N  d S )^NZIBUS_TABLE_DEBUG_LEVELr   zCannot open database file %szPRAGMA encoding = "UTF-8";z"PRAGMA case_sensitive_like = true;zPRAGMA page_size = 4096;zPRAGMA cache_size = 20000;zPRAGMA temp_store = MEMORY;z$PRAGMA journal_size_limit = 1000000;zPRAGMA synchronous = NORMAL;z"Error while initializing database.z:CREATE TABLE IF NOT EXISTS main.ime (attr TEXT, val TEXT); Zsomebodyz%sz%Y%m%dzibus-table.svgZLGPLZabcdefghijklmnopqrstuvwxyz4Zspace1,2,3,4,5,6,7,8,9,0zPage_Up,minuszPage_Down,equaltrueZfalsezA IME under IBus Tableus0z{})"namez
name.zh_cnz
name.zh_hkz
name.zh_twZauthoruuidserial_numberZiconlicense	languageslanguage_filterZvalid_input_charsmax_key_lengthZcommit_keysselect_keysZpage_up_keysZpage_down_keysZstatus_promptZdef_full_width_punctZdef_full_width_letteruser_can_define_phraseZpinyin_modedynamic_adjustZauto_selectZauto_commitZdescriptionZlayoutZsymbolrulesleast_commit_lengthstart_charsorientationZalways_show_lookupZchar_promptsz9
            SELECT val FROM main.ime WHERE attr = :attr;zC
            INSERT INTO main.ime (attr, val) VALUES (:attr, :val);attrvalr   r   r+   r'   r-   zXCould not find "user_can_define_phrase" entry from database, is it an outdated database?Fr.   zPCould not find "dynamic_adjust" entry from database, is it an outdated database?Ztables.dbz.cache:memory:HOMEz.ibus/tablesz	debug.logzsetup-debug.logT)exist_okz.The user database %(udb)s does not exist yet.
Zudbidtabkeysphrasefreq	user_freqversionz/The user database %s seems to be incompatible.
z1There is no version information in the database.
0.0)old_database_versionz+The version of the database does not match z3(too old or too new?).
ibus-table wants version=%s
z*But the  database actually has version=%s
z6The number of columns of the database does not match.
zibus-table expects %s columns.
z*But the database actually has %s columns.
z&But the versions of the databases are zidentical.
zThis should never happen!
)strftimez-%Y-%m-%d_%H:%M:%Sz,Renaming the incompatible database to "%s".
z-shmz-walz$Creating a new, empty database "s".
zIf user phrases were successfully recovered from the old,
incompatible database, they will be used to initialize the new database.
zCompatible database %s found.
z"Connect to the database %(name)s.
r%   a  
                ATTACH DATABASE "%s" AS user_db;
                PRAGMA user_db.encoding = "UTF-8";
                PRAGMA user_db.case_sensitive_like = true;
                PRAGMA user_db.page_size = 4096;
                PRAGMA user_db.cache_size = 20000;
                PRAGMA user_db.temp_store = MEMORY;
                PRAGMA user_db.journal_mode = WAL;
                PRAGMA user_db.journal_size_limit = 1000000;
                PRAGMA user_db.synchronous = NORMAL;
            z Could not open the database %s.
z%Creating a new, empty database "%s".
user_dbr      r   r<   r=   r>   r?   z
            INSERT INTO user_db.phrases (tabkeys, phrase, freq, user_freq)
            VALUES (:tabkeys, :phrase, :freq, :user_freq)
            PRAGMA wal_checkpoint;)OintosgetenvDEBUG_LEVEL	TypeError
ValueErrorZold_phrasesfilename_user_dbreset_phrases_cachepathisfilesqlite3connectr   printr   r
   r   executescriptr&   Zuuid4timerC   _default_ime_attributessortedr	   r   ime_propertiesr   _mlen_snum
is_chinese_is_chineseis_cjkZ_is_cjkr-   boollowerr.   	get_rulesr/   get_possible_tabkeys_lengthsZpossible_tabkeys_lengthsget_start_charsZ
startcharsibus_table_locationjoinZ	data_homebasenamereplace
cache_pathload_phrases_cacheisdiraccessF_OKunlinkshutilZcopytreeZrmtreesymlinkmakedirsexistssysstderrwriteget_database_descDATABASE_VERSION%get_number_of_columns_of_phrase_tablelenextract_user_phrasesrenameinit_user_dbcreate_tablesappendexecutemanycommitcreate_indexesgenerate_userdb_desc)r   rN   rD   Zcreate_databaser
   select_sqlstrinsert_sqlstrr4   sqlargsre   Ztables_pathZ
cache_nameZold_tables_pathdescZphrase_table_column_namesrC   Z	timestampr=   r   r   r   r   r      sH   
)





 
 
 
 

    


 
 
	









zTabSqliteDb.__init__r   r   rD   Tc           	      C   s   t dkr,tjd||d d||d   |r4|s8dS d| }|||d}z*| j|| |rj| j  | | W n   d	dl}|	  Y nX dS )
zupdate phrase freqsr   z+update_phrase() tabkeys=%(t)s phrase=%(p)s tpzuser_freq=%(u)s database=%(d)s
)udNzu
        UPDATE %s.phrases SET user_freq = :user_freq
        WHERE tabkeys = :tabkeys AND phrase = :phrase
        ;)r?   r<   r=   r   )
rK   rs   rt   ru   r   r   r   invalidate_phrases_cacher
   r   )	r   r<   r=   r?   databaser   r   r   r
   r   r   r   update_phrase  s6    
zTabSqliteDb.update_phrasec                 C   s0   |    | jdkrdS | j  | jd dS )z1
        Trigger a checkpoint operation.
        NrG   )save_phrases_cacherO   r   r   r   r   r   r   r   
sync_usrdb  s
    

zTabSqliteDb.sync_usrdbc                 C   s   t dkrtjd i | _dS )z.
        Make the phrases cache empty
        r   zreset_phrases_cache()
N)rK   rs   rt   ru   _phrases_cacher   r   r   r   rP     s    zTabSqliteDb.reset_phrases_cachec                 C   sV   t dkrtjd td| jd D ],}| j|d| r$| j|d|  q$dS )u   
        Delete all phrases starting with “tabkeys” from
        the phrases cache.

        :param tabkeys: The keys typed
        :type tabkeys: String
        r   zinvalidate_phrases_cache()
r   N)	rK   rs   rt   ru   ranger[   r   r   pop)r   r<   ir   r   r   r     s
    z$TabSqliteDb.invalidate_phrases_cachec                 C   s   t dkrtjd z@ddl}|t| jd| _| j	d}|rL|| j
krRi | _W nv tk
r   t dkr~tjd| j  Y nJ tk
r   t dkrtjd| j  Y n   tjd	| j  Y nX dS )
z.
        Load phrases cache from disk
        r   zload_phrases_cache()
r   Nrr'   z'load_phrases_cache() File %s not found
z1load_phrases_cache() Permission error reading %s
z.load_phrases_cache() Unknown error reading %s
)rK   rs   rt   ru   jsonloadopenri   r   r   r\   FileNotFoundErrorPermissionError)r   r   Zsnumr   r   r   rj     s4    
zTabSqliteDb.load_phrases_cachec                 C   sz   t dkrtjd zDddl}| j| jd< | jd }|| jt	|d t
|| j W n   ddl}|  Y nX dS )z.
        Save phrases cache from disk
        r   zsave_phrases_cache()
r   Nr'   z.tmpw)rK   rs   rt   ru   r   r\   r   ri   dumpr   rI   rh   r
   r   )r   r   Z_cache_pathr
   r   r   r   r     s    
zTabSqliteDb.save_phrases_cachec                 C   s@   | j d}|r<|d}|D ]}| ddkr dS qdS )zg
        Check whether this input method is classified as Chinese
        in the the database.
        r)   ,zhTF)rZ   r   splitra   find)r   r)   Zlangslangr   r   r   r]   $  s    
zTabSqliteDb.is_chinesec                 C   sH   | j d}|rD|d}|D ]$}dD ]}| |r&  dS q&qdS )zx
        Check whether this input method is classified as Chinese,
        Japanese, or Korean in the database.
        r)   r   )r   ZjaZkoTF)rZ   r   r   strip
startswith)r   r)   Zlanguager   r   r   r   r_   1  s    
zTabSqliteDb.is_cjkc                 C   s$   | j d}|dkr t|d S dS )a  
        Get the default Chinese mode from the database

        0 means to show simplified Chinese only
        1 means to show traditional Chinese only
        2 means to show all characters but show simplified Chinese first
        3 means to show all characters but show traditional Chinese first
        4 means to show all characters

        If no mode is specified in the database, return 4 to avoid all
        filtering of characters.

        :rtype: Integer
        r*   )Zcm0Zcm1Zcm2Zcm3Zcm4r      )rZ   r   rH   )r   r*   r   r   r   get_chinese_mode?  s    zTabSqliteDb.get_chinese_modec                 C   s   | j d}|r|S dS )zc
        Get the keys used to select a candidate from the database

        :rtype: String
        r,   r!   rZ   r   )r   Zretr   r   r   get_select_keysS  s    zTabSqliteDb.get_select_keysc              	   C   s2   zt | jdW S  ttfk
r,   Y dS X dS )zl
        Get the default orientation of the lookup table from the database

        :rtype: Integer
        r2   r   N)rH   rZ   r   rL   rM   r   r   r   r   get_orientation^  s    zTabSqliteDb.get_orientationc                 C   sR   |dkr0d| }| j | d| }| j | d| }| j | | j   dS )z%Create tables that contain all phrasemainzp
            CREATE TABLE IF NOT EXISTS %s.goucima
            (zi TEXT PRIMARY KEY, goucima TEXT);
            zp
            CREATE TABLE IF NOT EXISTS %s.pinyin
            (pinyin TEXT, zi TEXT, freq INTEGER);
            z
        CREATE TABLE IF NOT EXISTS %s.phrases
        (id INTEGER PRIMARY KEY, tabkeys TEXT, phrase TEXT,
        freq INTEGER, user_freq INTEGER);
        N)r   r   r   )r   r   r   r   r   r   r}   i  s    zTabSqliteDb.create_tablesc                 C   s   d}d}d}|D ]B\}}||d}| j || rD| j || q| j || q| j   t| j | jd| _t| jd| _	| 
 | _| jd| _| jrt| j dk| _ntd	 d
| _|  | _dS )zUpdate or insert attributes in ime table, attrs is a iterable object
        Like [(attr,val), (attr,val), ...]

        This is called only by tabcreatedb.py.
        z+SELECT val from main.ime WHERE attr = :attrz2UPDATE main.ime SET val = :val WHERE attr = :attr;z6INSERT INTO main.ime (attr, val) VALUES (:attr, :val);r3   r6   r+   r-   r"   zWCould not find "user_can_define_phrase" entry from database, is it a outdated database?FN)r   r   r	   r   r   rX   rZ   rH   r   r[   r]   r^   r-   r`   ra   rU   rb   r/   )r   Zattrsr   Zupdate_sqlstrr   r4   r5   r   r   r   r   
update_ime  s6    


zTabSqliteDb.update_imec              	   C   s2  i }t d}t d}| js"dS z| jd}|rB| d}|D ]}||}|rg }|ddkr|t	|d|d	< |d
d}t
|| jkrtd|   q|D ]0}	||	}
|t	|
dt	|
df q||t	|d< qFtd|  qFW n& tk
r,   ddl}|  Y nX |S )zGet phrase construct ruleszc([ea])(\d):(.*)zp(-{0,1}\d)(-{0,1}\d)r   r/   ;r   arE   abover   +zrule: "%s" over max key lengthznot a legal rule: "%s"r   N)recompiler-   rZ   r   r   r   matchgrouprH   ry   r[   rU   r~   	Exceptionr
   r   )r   r/   Zpatt_rZpatt_pZ_rulesruleresZcmsZ_cmsZ_cmZcm_resr
   r   r   r   rb     s<    



zTabSqliteDb.get_rulesc              	      s    j r4 j d } fddtd|d D dd S zt jd}W n ttfk
rd   d}Y nX |dkrtt| jd S g S dS )	u  Return a list of the possible lengths for tabkeys in this table.

        Example:

        If the table source has rules like:

            RULES = ce2:p11+p12+p21+p22;ce3:p11+p21+p22+p31;ca4:p11+p21+p31+p41

        self._rules will be set to

            self._rules = {
                2: [(1, 1), (1, 2), (2, 1), (2, 2)],
                3: [(1, 1), (1, 2), (2, 1), (3, 1)],
                4: [(1, 1), (2, 1), (3, 1), (-1, 1)],
                'above': 4}

        and then this function returns “[4, 4, 4]”

        Or, if the table source has no RULES but LEAST_COMMIT_LENGTH=2
        and MAX_KEY_LENGTH = 4, then it returns “[2, 3, 4]”

        I cannot find any tables which use LEAST_COMMIT_LENGTH though.
        r   c                    s   g | ]}t  j| qS r   )ry   r/   .0xr   r   r   
<listcomp>  s     z<TabSqliteDb.get_possible_tabkeys_lengths.<locals>.<listcomp>rE   r   Nr0   r   )	r/   r   rH   rZ   r   rL   rM   listr[   )r   Zmax_lenZleast_commit_lenr   r   r   rc     s    
$

z(TabSqliteDb.get_possible_tabkeys_lengthsc                 C   s   | j dS )z"return possible start chars of IMEr1   r   r   r   r   r   rd     s    zTabSqliteDb.get_start_charsc                 C   s*   | j d}t|tdkr&|d}|S )z6Get the characters which engine should not change freqZno_check_charsr   r   )rZ   r   typedecode)r   Z_charsr   r   r   get_no_check_chars  s    
zTabSqliteDb.get_no_check_charsr   c           	      C   s   t dkrtjdt|  dd|i }g }|D ]*\}}}}|||||d | | q0| j|| | j	  | j
d dS )u  Add many phrases to database fast. Used by tabcreatedb.py when
        creating the system database from scratch.

        “phrases” is a iterable object which looks like:

            [(tabkeys, phrase, freq ,user_freq),
             (tabkeys, phrase, freq, user_freq), ...]

        This function does not check whether phrases are already
        there.  As this function is only used while creating the
        system database, it is not really necessary to check whether
        phrases are already there because the database is initially
        empty anyway. And the caller should take care that the
        “phrases” argument does not contain duplicates.

        r   zadd_phrases() len(phrases)=%s

        INSERT INTO %(database)s.phrases
        (tabkeys, phrase, freq, user_freq)
        VALUES (:tabkeys, :phrase, :freq, :user_freq);
        r   rF   rG   N)rK   rs   rt   ru   ry   r~   r   r   r   r   r   )	r   phrasesr   r   insert_sqlargsr<   r=   r>   r?   r   r   r   add_phrases  s&    
zTabSqliteDb.add_phrasesc                 C   s  t dkr,tjd||d d||d   |r4|s8dS dd|i }||d	}| j|| }	|	rt dkrtjd
d||d  dd|	i   dS dd|i }
||||d}t dkrtjd|
|d  z*| j|
| |r| j  | | W n   ddl	}|
  Y nX dS )zaAdd phrase to database, phrase is a object of
        (tabkeys, phrase, freq ,user_freq)
        r   z&add_phrase tabkeys=%(t)s phrase=%(p)s r   zfreq=%(f)s user_freq=%(u)s
)fr   Nzk
        SELECT * FROM %(database)s.phrases
        WHERE tabkeys = :tabkeys AND phrase = :phrase;
        r   r<   r=   zadd_phrase() z-select_sqlstr=%(sql)s select_sqlargs=%(arg)s )Zsqlargzalready there!: results=%(r)s 
r   r   rF   z:add_phrase() insert_sqlstr=%(sql)s insert_sqlargs=%(arg)s
r   )rK   rs   rt   ru   r   r   r	   r   r   r
   r   )r   r<   r=   r>   r?   r   r   r   Zselect_sqlargsr   r   r   r
   r   r   r   
add_phrase  sj    

zTabSqliteDb.add_phrasec                 C   sz   d}g }|D ]\}}| ||d qz2| j  | j|| | j  | jd W n   ddl}|  Y nX dS )zyAdd goucima into database, goucimas is iterable object
        Like goucimas = [(zi,goucima), (zi,goucima), ...]
        zP
        INSERT INTO main.goucima (zi, goucima) VALUES (:zi, :goucima);
        )zigoucimarG   r   N)r~   r   r   r   r   r
   r   )r   Zgoucimasr   r   r   r   r
   r   r   r   add_goucimaO  s    

zTabSqliteDb.add_goucimac           	      C   s   d| }d}|D ]\}}}|d7 }| dd dd dd	 d
d dd}z| j||||d W q tk
r   tjdd||||d   ddl}|  Y qX q| j	  dS )zxAdd pinyin to database, pinyins is a iterable object
        Like: [(zi,pinyin, freq), (zi, pinyin, freq), ...]
        zX
        INSERT INTO %s.pinyin (pinyin, zi, freq) VALUES (:pinyin, :zi, :freq);
        r   r   1!2@3#r    $5%)pinyinr   r>   z(Error when inserting into pinyin table. z-count=%(c)s pinyin=%(p)s zi=%(z)s freq=%(f)s
)cr   zr   N)
rh   r   r   r   rs   rt   ru   r
   r   r   )	r   Zpinyinsr   r   countr   r   r>   r
   r   r   r   
add_pinyinb  sL          
zTabSqliteDb.add_pinyinc                 C   s*   d}| j | | j d | j   dS )ze
        Optimize the database by copying the contents
        to temporary tables and back.
        a  
            CREATE TABLE tmp AS SELECT * FROM main.phrases;
            DELETE FROM main.phrases;
            INSERT INTO main.phrases SELECT * FROM tmp ORDER BY
            tabkeys ASC, phrase ASC, user_freq DESC, freq DESC, id ASC;
            DROP TABLE tmp;
            CREATE TABLE tmp AS SELECT * FROM main.goucima;
            DELETE FROM main.goucima;
            INSERT INTO main.goucima SELECT * FROM tmp ORDER BY zi, goucima;
            DROP TABLE tmp;
            CREATE TABLE tmp AS SELECT * FROM main.pinyin;
            DELETE FROM main.pinyin;
            INSERT INTO main.pinyin SELECT * FROM tmp ORDER BY pinyin ASC, freq DESC;
            DROP TABLE tmp;
            zVACUUM;N)r   rV   r   )r   r   r   r   r   optimize_database~  s    zTabSqliteDb.optimize_databasec                 C   s   t dkrtjd dS )zDrop the indexes in the database to reduce its size

        We do not use any indexes at the moment, therefore this
        function does nothing.
        r   zdrop_indexes()
NrK   rs   rt   ru   )r   	_databaser   r   r   drop_indexes  s    zTabSqliteDb.drop_indexesc                 C   s   t dkrtjd dS )u  Create indexes for the database.

        We do not use any indexes at the moment, therefore
        this function does nothing. We used indexes before,
        but benchmarking showed that none of them was really
        speeding anything up, therefore we deleted all of them
        to get much smaller databases (about half the size).

        If some index turns out to be very useful in future, it could
        be created here (and dropped in “drop_indexes()”).
        r   zcreate_indexes()
Nr   )r   r   Z_commitr   r   r   r     s    zTabSqliteDb.create_indexesc                 C   s,   z| d}W n tk
r&   d}Y nX |S )z
        Encode a string in Big5 or, if that is not possible,
        return something higher than any Big5 code.

        :param phrase: String to be encoded in Big5 encoding
        :type phrase: String
        :rtype: Byte array
        ZBig5s   )encodeUnicodeEncodeError)r   r=   big5r   r   r   	big5_code  s
    	
zTabSqliteDb.big5_coder   r   c                    s   d}t j| jdd}|dkr*| jndd |dkrr| jrr|dkrNd	 nd t| fd
ddd| S t|fdddd| S )u   
        “candidates” is an array containing something like:
        [(tabkeys, phrase, freq, user_freq), ...]

        “typed_tabkeys” is key sequence the user really typed, which
        maybe only the beginning part of the “tabkeys” in a matched
        candidate.
        d   r7   r   )Zcangjie3Zcangjie5zcangjie-bigzquick-classicZquick3Zquick5c                 S   s   dS )Nr   r   r   r   r   r   <lambda>      z-TabSqliteDb.best_candidates.<locals>.<lambda>)rE   r   rE   r   c              
      sf   t | d k d| d   t| d @  d| d  t| d | d | d d t| d d fS )Nr   r   r   r   rE   )rH   chinese_variantsdetect_chinese_categoryry   ordr   bitmaskcode_point_functiontyped_tabkeysr   r   r     s    



r   Nc              	      sT   t | d k d| d  d| d  t| d | d  | d d t| d d fS )Nr   r   r   rE   r   )rH   ry   r   r   )r   r   r   r   r     s    



)rI   rQ   rg   rN   rh   r   r^   rY   )r   r   
candidateschinese_modeZmaximum_number_of_candidatesZengine_namer   r   r   best_candidates  s&    
zTabSqliteDb.best_candidatesc              
   C   s.  |sg S | j |}|r|S d}|r(d}| js4| jrBdd|i }	ndd|i }	d}
dD ]}|||fkrV|}
qV|}||
|
|
 }d||fkr|d|
d }d	||fkr|d	|
d	 }|r||d	}|r||d
}|r|d
7 }||
d}tdkrtjd|	t	|f  | j
|	| }d}|dkr6d}n|dkrDd}|sP|}n.g }|D ]$}|t|d @ rX|| qXi }|D ]f}|d |d f}||kr|||< n<|||t|d || d t|d || d f fg q| j|| |d}tdkr tjdt	|  || j |< |S )zE
        Get matching phrases for tabkeys from the database.
        r   z AND length(phrase)=1 a  
            SELECT tabkeys, phrase, freq, user_freq FROM
            (
                SELECT tabkeys, phrase, freq, user_freq FROM main.phrases
                WHERE tabkeys LIKE :tabkeys ESCAPE :escapechar %(one_char_condition)s
                UNION ALL
                SELECT tabkeys, phrase, freq, user_freq FROM user_db.phrases
                WHERE tabkeys LIKE :tabkeys ESCAPE :escapechar %(one_char_condition)s
            )
            one_char_conditionz
            SELECT tabkeys, phrase, freq, user_freq FROM main.phrases
            WHERE tabkeys LIKE :tabkeys ESCAPE :escapechar %(one_char_condition)s
            u   ☺z!@#r   _%%)r<   
escapecharr   z$select_words() sqlstr=%s sqlargs=%s
Nr   rE   r   r   r   r   zselect_words() best=%s
)r   r   r-   r.   rh   rK   rs   rt   ru   r   r   r   r	   r   r   r~   updatemaxr   values)r   r<   Zonecharr   single_wildcard_charmulti_wildcard_charZauto_wildcardZbestr   r   r   chartabkeys_for_liker   Zunfiltered_resultsr   r   r   phrase_frequenciesr   r   r   r   select_words  s    	   








zTabSqliteDb.select_wordsc                 C   s   |sg S d}|}|r | |d}|r0| |d}|d7 }d|i}| j|| }d}	|dkrdd}	n|dkrpd}	g }
|D ]J\}}}|	s|
t|||dg qx|	t|@ rx|
t|||dg qx| j||
|d	S )
zh
        Get Chinese characters matching the pinyin given by tabkeys
        from the database.
        z}
        SELECT pinyin, zi, freq FROM main.pinyin WHERE pinyin LIKE :tabkeys
        ORDER BY freq DESC, pinyin ASC
        ;r   r   r<   Nr   r   rE   r   )	rh   r   r   r	   r~   tupler   r   r   )r   r<   r   r   r   r   r   r   r   r   r   r   r   r>   r   r   r   #select_chinese_characters_by_pinyinY  sB      z/TabSqliteDb.select_chinese_characters_by_pinyinc                 C   sh   zFd}| j | d}| j |dtf d}| j |d | j   W n   ddl}|  Y nX dS )z{
        Add a description table to the user database

        This adds the database version and  the create time
        zBCREATE TABLE IF NOT EXISTS user_db.desc (name PRIMARY KEY, value);z2INSERT OR IGNORE INTO user_db.desc  VALUES (?, ?);r@   zMINSERT OR IGNORE INTO user_db.desc  VALUES (?, DATETIME("now", "localtime"));)zcreate-timer   N)r   rV   r   rw   r   r
   r   )r   Z	sqlstringr
   r   r   r   r     s    z TabSqliteDb.generate_userdb_descc                 C   s6   |dkrdS t |s2t|}|d |  dS )z
        Initialize the user database unless it is an in-memory database

        :param db_file: Full path of the database file.
        :type db_file: String
        r8   Nax  
                PRAGMA encoding = "UTF-8";
                PRAGMA case_sensitive_like = true;
                PRAGMA page_size = 4096;
                PRAGMA cache_size = 20000;
                PRAGMA temp_store = MEMORY;
                PRAGMA journal_mode = WAL;
                PRAGMA journal_size_limit = 1000000;
                PRAGMA synchronous = NORMAL;
            )rQ   rr   rS   rT   rV   r   )r   db_filer   r   r   r   r|     s    



zTabSqliteDb.init_user_dbc                 C   sb   t |sdS z@t|}i }|d D ]}|d ||d < q,|  |W S    Y dS X dS )z
        Get the description table from the database

        :param db_file: Full path of the database file.
        :type db_file: String
        :rtype: Dictionary
        NzSELECT * FROM desc;r   r   )rQ   rr   rS   rT   r   r	   close)r   r   r   r   rowr   r   r   rv     s    

zTabSqliteDb.get_database_descc                 C   s   t |sdS zbt|}|d }d|d d  }t	d|}|rh|
dd}t|W S W dS W n   Y dS X dS )a1  
        Get the number of columns in the 'phrases' table in
        the database in db_file.

        Determines the number of columns by parsing this:

        sqlite> select sql from sqlite_master where name='phrases';
        CREATE TABLE phrases
                (id INTEGER PRIMARY KEY, tabkeys TEXT, phrase TEXT,
                freq INTEGER, user_freq INTEGER)
        sqlite>

        This result could be on a single line, as above, or on multiple
        lines.

        :param db_file: Full path of the database file.
        :rtype: Integer
        r   z3select sql from sqlite_master where name='phrases'; z
.*\((.*)\)r   r   N)rQ   rr   rS   rT   r   r	   rf   
splitlinesr   r   r   r   ry   )r   r   r   Ztp_resstringr   tpr   r   r   rx     s    



z1TabSqliteDb.get_number_of_columns_of_phrase_tablec                 C   sT   |sdS d}| j |d|i }|r4|d d }nd}tdkrPtjd|  |S )zGet goucima of given characterr   z0SELECT goucima FROM main.goucima WHERE zi = :zi;r   r   r   zget_goucima() goucima=%s
)r   r   r	   rK   rs   rt   ru   )r   r   r   r   r   r   r   r   get_goucima  s    zTabSqliteDb.get_goucimac                 C   sh  t dkr tjd|| jd  t|tdkr:|d}|sBdS t|dkrX| |S | jsbdS t|| jkr| jt| }n@t|| jd kr| j| jd  }ntjd|| jd  dS t|| j	krtjdd	|| j	d
   dS d}|D ]R\}}|dkr|d8 }|dkr"|d8 }| || | }|s@ dS ||7 }qt dkrdtjd|  |S )u'  Parse phrase to get its table code

        Example:

        Let’s assume we use wubi-jidian86. The rules in the source of
        that table are:

          RULES = ce2:p11+p12+p21+p22;ce3:p11+p21+p31+p32;ca4:p11+p21+p31+p-11

        “ce2” is a rule for phrases of length 2, “ce3” is a rule
        for phrases of length 3, “ca4” is a rule for phrases of
        length 4 *and* for all phrases with a length greater then
        4. “pnm” in such a rule means to use the n-th character of
        the phrase and take the m-th character of the table code of
        that character. I.e. “p-11” is the first character of the
        table code of the last character in the phrase.

        Let’s assume the phrase is “天下大事”. The goucima (構詞碼
        = “word formation keys”) for these 4 characters are:

            character goucima
            天        gdi
            下        ghi
            大        dddd
            事        gkvh

        (If no special goucima are defined by the user, the longest
        encoding for a single character in a table is the goucima for
        that character).

        The length of the phrase “天下大事” is 4 characters,
        therefore the rule ca4:p11+p21+p31+p-11 applies, i.e. the
        table code for “天下大事” is calculated by using the first,
        second, third and last character of the phrase and taking the
        first character of the goucima for each of these. Therefore,
        the table code for “天下大事” is “ggdg”.

        r   z'parse_phrase() phrase=%(p)s rules%(r)s
)r   r   r   zUTF-8r   z9No rule for this phrase length. phrase=%(p)s rules=%(r)s
z!Rule exceeds maximum key length. zrule=%(r)s self._mlen=%(m)s
)r   mr   zparse_phrase() tabkeys=%s
)
rK   rs   rt   ru   r/   r   r   ry   r  r[   )r   r=   r   r<   r   ZmaZtabkeyr   r   r   parse_phrase  s\    '








zTabSqliteDb.parse_phrasec                 C   s   t dkrtjd||d  |r&|s*dS d}|d |d}| j|| }t dkrxtjd||d d	d
|i   t|S )u   
        Checks whether “phrase” can be matched in the system database
        with a key sequence *starting* with “tabkeys”.
        r   z3is_in_system_database() tabkeys=%(t)s phrase=%(p)s
r   Fzf
        SELECT * FROM main.phrases
        WHERE tabkeys LIKE :tabkeys AND phrase = :phrase;
        r   r   z3is_in_system_database() tabkeys=%(t)s phrase=%(p)s zresults=%(r)s
r   )rK   rs   rt   ru   r   r   r	   r`   )r   r<   r=   r   r   r   r   r   r   is_in_system_databaseN  s,    z!TabSqliteDb.is_in_system_databasec                 C   sv   t dkrtjd||d  |r&|s*dS d}||d}| j|| }t dkrbtjd|  |rr|d d S dS )uV  
        Return how often a conversion result “phrase” for the typed keys
        “tabkeys” has been happened by checking the user database.

        :param tabkeys: The keys typed
        :type tabkeys: String
        :param phrase: A conversion result for these tabkeys
        :type phrase: String
        :rtype: Integer

        r   z,user_frequency() tabkeys=%(t)s phrase=%(p)s
r   r   z
        SELECT sum(user_freq) FROM user_db.phrases
        WHERE tabkeys = :tabkeys AND phrase = :phrase GROUP BY tabkeys, phrase;
        r   zuser_frequency() result=%s
)rK   rs   rt   ru   r   r   r	   )r   r<   r=   r   r   r   r   r   r   user_frequencyg  s     
zTabSqliteDb.user_frequencyc                 C   s  t dkrtjd||d  t|tdkr8|d}t|tdkrR|d}|rZ|s^dS | jrp|tkrpdS | js| j	r| j
sdS | |}|sdS | j||drdS | j||ddkrdS | j||d	dd
d n| j||dr*| j||d}|dkr| j|||d d n| j||ddd
d nj| j	r:| j
s>dS | |}|sRdS | j||d}|dkr| j|||d d n| j||d	dd
d dS )a  Adjust user_freq in user database if necessary.

        Also, if the phrase is not in the system database, and it is a
        Chinese table, and defining user phrases is allowed, add it as
        a user defined phrase to the user database if it is not yet
        there.
        r   z2check_phrase_internal() tabkey=%(t)s phrase=%(p)s
r   r   utf8Nr   r   r   rD   )r<   r=   r>   r?   r   )r<   r=   r?   )rK   rs   rt   ru   r   r   r^   CHINESE_NOCHECK_CHARSr.   r-   r]   r  r	  r
  r   r   )r   r<   r=   r?   r   r   r   check_phrase  s    


   
     

     zTabSqliteDb.check_phrasec                 C   sJ   t |t dkr|d}d}d|i}| j|| }dd |D }|S )uu  
        Return the list of possible tabkeys for a phrase.

        For example, if “phrase” is “你” and the table is wubi-jidian.86.txt,
        the result will be ['wq', 'wqi', 'wqiy'] because that table
        contains the following 3 lines matching that phrase exactly:

        wq	你	597727619
        wqi	你	1490000000
        wqiy	你	1490000000
        r   r  zo
        SELECT tabkeys FROM main.phrases WHERE phrase = :phrase
        ORDER by length(tabkeys) ASC;
        r=   c                 S   s   g | ]}|d  qS )r   r   r   r   r   r   r     s     z,TabSqliteDb.find_zi_code.<locals>.<listcomp>)r   r   r   r   r	   )r   r=   r   r   r   Zlist_of_possible_tabkeysr   r   r   find_zi_code  s    
zTabSqliteDb.find_zi_codec                 C   sZ   |sdS |rdd|i }ndd|i }||d}| j || |rL| j   | | dS )z$Remove phrase from database
        Nzu
            DELETE FROM %(database)s.phrases
            WHERE tabkeys = :tabkeys AND phrase = :phrase;
            r   z^
            DELETE FROM %(database)s.phrases
            WHERE phrase = :phrase;
            r   )r   r   r   r   )r   r<   r=   r   r   Zdelete_sqlstrZdelete_sqlargsr   r   r   remove_phrase  s    

zTabSqliteDb.remove_phraserA   c                 C   sl  t jd z8t|}|d |dkrt|d }|  t|dd d}t jdt	|  |d	d	 W S g }|d
 }|D ]z}d}d|d i}| j
|| }	|	r||	d d |d d|d f q| |d }
|
r||
|d d|d f q|  t|dd d}t jddt	|   |d	d	 W S W n    dd	l}|  g  Y S X d	S )z"extract user phrases from databasezCTrying to recover the phrases from the old, incompatible database.
rG   r   z
                    SELECT tabkeys, phrase, freq, sum(user_freq) FROM phrases
                    GROUP BY tabkeys, phrase, freq;
                    c                 S   s   | d | d | d | d fS Nr   r   rE   r   r   r   r   r   r   r     r   z2TabSqliteDb.extract_user_phrases.<locals>.<lambda>r   z4Recovered phrases from the old database: phrases=%s
Nz;SELECT phrase, sum(user_freq) FROM phrases GROUP BY phrase;z
                    SELECT tabkeys FROM main.phrases WHERE phrase = :phrase
                    ORDER BY length(tabkeys) DESC;
                    r=   r   r   r   c                 S   s   | d | d | d | d fS r  r   r   r   r   r   r   ,  r   z.Recovered phrases from the very old database: zphrases=%s
)rs   rt   ru   rS   rT   r   r	   r   rY   r   r   r~   r  r
   r   )r   Zdatabase_filerB   r   r   r   r   r   r   Ztabkeys_resultsr<   r
   r   r   r   rz     sp    

   z TabSqliteDb.extract_user_phrases)NNF)r   r   r   rD   T)r   )r   )r   r   r   r   r   T)r   )T)r   r   r   )r   Fr   r   r   F)r   r   r   r   )r   r   )r   r   )r   r   )r   r   rD   T)r   rA   )-r   r   r   r   r   r   r   rP   r   rj   r   r]   r_   r   r   r   r}   r   rb   rc   rd   r   r   r   r   r   r   r   r   r   r   r   r   r   r|   rv   rx   r  r  r	  r
  r  r  r  rz   r   r   r   r   r   r   s        
  2        


#!%
&          
7


     
8         
d      
*&U


A       
   r   )r   rI   os.pathrQ   rs   ro   rS   r&   rW   r   r   version_inforeloadZsetdefaultencodingrH   rK   rw   r  r   r   r   r   r   r   <module>   s"   

 &