U
    cc5%                     @   s2  d Z ddlmZ ddlZddlZddlZddlZddlZddl	m
Z
mZ ddlmZ ejdZedd Zedd	 Zeejd
ddgejddddgdd Zedd Zedd Zeejddi e
ddgifdddie
ddgifdddgie
dd d!gifd"dgd#d$e
ddgifd"dgd%d$e
dejdgifgd&d' Zed(d) Zeejd*d+d,d-gd.d/ Zeejd0d#d%gd1d2 Zed3d4 Zejdd5dd6d7d8gd9d: Zeejdd5ddd;d<gd=d> Zed?d@ Z edAdB Z!ejdCejdDdEdFgdGdH Z"dS )IzZ
Tests encoding functionality during parsing
for all of the parsers defined in parsers.py
    )BytesION)	DataFrameread_csvZpyarrow_skipc                 C   sL   d}| }t d|}|j|d|d}tddggddgd	}t|| d S )
Ncp1255u   שלום:1234
562:123:)sepencodingi2  {   u   שלום1234columnsr   encoder   r   tmassert_frame_equal)all_parsersr   parserdataresultexpected r   ^/var/www/html/project/venv/lib/python3.8/site-packages/pandas/tests/io/parser/test_encoding.pytest_bytes_io_input   s    r   c                 C   s@   | }t d }|j|ddd d}tddgg}t|| d S )Nu   Łaski, Jan;1;utf-8)r   r   headeru   Łaski, Jan   r   )r   r   r   r   r   r   r   r   test_read_csv_unicode"   s
    r   r   ,	r   utf-16zutf-16lezutf-16bec              
   C   s   | }d d|}dt  d}|dd}d}t|}dd	lm} ||}	t|d
}
|
	|	 W 5 Q R X t
||}|||d}|j|fd|i|}|j|fd|i|}|  t|| W 5 Q R X d S )Nz)skip this
skip this too
A,B,C
1,2,3
4,5,6r   __z__.csv   )r   Zskiprowsr   r   )TextIOWrapperwbr   r   )replaceuuiduuid4r   ensure_cleanior#   r   openwriter   r   closer   )r   r   r   r   r   pathkwargsutf8r#   
bytes_datafZbytes_bufferr   r   r   r   r   test_utf16_bom_skiprows,   s&     

r3   c                 C   s6   t j|d}| }|j|ddd}t|dks2td S )Nzutf16_ex.txtr    r   )r   r   2   )osr.   joinr   lenAssertionError)r   csv_dir_pathr.   r   r   r   r   r   test_utf16_exampleO   s    r:   c                 C   sL   t j|d}| }|j|d dd}|d}|d d }d}||ksHtd S )Nunicode_series.csvlatin-1)r   r   r   r   i`  u$   Á köldum klaka (Cold Fever) (1994))r5   r.   r6   r   Z	set_indexr8   )r   r9   r.   r   r   gotr   r   r   r   test_unicode_encodingW   s    
r>   zdata,kwargs,expectedza
1ar   z"a"
1	quotechar"zb
1namesb1z
1T)rB   Zskip_blank_linesFc                    sD   | }d d fdd}|j ||fdi|}t|| d S )Nu   ﻿r   c                    s    |   }t|S )N)r   r   )_dataZbom_databomr0   r   r   _encode_data_with_bom~   s    z,test_utf8_bom.<locals>._encode_data_with_bomr   )r   r   r   )r   r   r/   r   r   rH   r   r   rF   r   test_utf8_bomd   s    rI   c                 C   sL   t dgdgd}| }||}d|}|jt||d}t|| d S )Ng333333@test)Zmb_numZ	multibytezmb_num,multibyte
4.8,testr%   )r   formatr   r   r   r   r   )r   	utf_valueencoding_fmtr   r   r   r   r   r   r   r   test_read_csv_utf_aliases   s    

rN   zfile_path,encoding))r*   r   csvz	test1.csvr   ))r*   r   r   r;   r<   ))r*   r   r   zsauron.SHIFT_JIS.csvshiftjisc           
   	   C   s   | }|| }|j ||d}t||d}| |}|jr<tW 5 Q R X t|| t|dd}	|j |	|d}|	jrxtW 5 Q R X t|| t|ddd}	|j |	|d}|	jrtW 5 Q R X t|| d S )Nr%   rbmoder   )rS   	buffering)r   r+   closedr8   r   r   )
r   	file_pathr   datapathr   Zfpathr   far   Zfbr   r   r   test_binary_mode_file_buffers   s    
rY   pass_encodingc           	   	   C   sr   | }| |}tddgi}tjd|dd<}|d |d |j||rP|nd d}t|| W 5 Q R X d S )	NZfoobarzw+T)rS   r   Zreturn_filelikezfoo
barr   r%   )rK   r   r   r)   r,   seekr   r   )	r   rL   rM   rZ   r   r   r   r2   r   r   r   r   test_encoding_temp_file   s    


r]   c              	   C   s~   | }d}d}d}t ||gi}t N}|| d| | |d |j||d}t|| |j	rpt
W 5 Q R X d S )Nz	shift-jisu	   てすとu   こむ
r   r%   )r   tempfileNamedTemporaryFiler,   r   r\   r   r   r   rU   r8   )r   r   r   titler   r   r2   r   r   r   r   test_encoding_named_temp_file   s    

rb   r   z	utf-16-bez	utf-16-lezutf-32c                 C   sR   d}t || }t|d| d}tddgddgdd	ggd
dgd}t|| d S )Nu   a	b
：foo	0
bar	1
baz	2r   )	delimiterr   u   ：foor   r[   r   Zbazr"   r?   rC   )r   r   r   )r   r   Zencoded_datar   r   r   r   r   %test_parse_encoded_special_characters   s
    "rd   r   r<   c              	   C   sp   | }t ddddgddddgd	d
ddgd}t &}|j|d|d |j||dd}W 5 Q R X t|| d S )NZRaphaelZ	DonatellozMiguel AngelZLeonardoredpurpleZorangeblueZsaizbo staffZnunchunkZkatana)namemaskZweaponF)indexr   T)r   
memory_map)r   r   r)   to_csvr   r   )r   r   r   r   filedfr   r   r   test_encoding_memory_map   s    



ro   c              	   C   sh   | }t dgd d}d|jd< td*}|j|dddd	 |j|d
ddd}W 5 Q R X t|| d
S )zO
    Chunk splits a multibyte character with memory_map=True

    GH 43540
    Zaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaai   )r   u   aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaąi  zbug-gh43540.csvFr   rj   r   r   NTc)r   rk   engine)r   Zilocr   r)   rl   r   r   )r   r   rn   fnamedfrr   r   r    test_chunk_splits_multibyte_char   s    
ru   c              	   C   s   g }d}d}d}t t|t||D ]X}ddd t ||d D d }z|d W n tk
rp   Y q$Y nX || q$| }t|}td	,}	|j	|	d
d
dd |j
|	ddddd}
W 5 Q R X t||
 dS )zg
    GH 43787

    Test correct handling of UTF-8 chars when memory_map=True and encoding is UTF-8
        u   𐂀 c                 S   s   g | ]}t |qS r   )chr).0rq   r   r   r   
<listcomp>  s     z,test_readcsv_memmap_utf8.<locals>.<listcomp>r^   r   zutf8test.csvFrp   NTrq   )r   rk   rr   r   )rangeordr6   r   UnicodeEncodeErrorappendr   r   r)   rl   r   r   )r   linesline_lengthZ
start_charZend_charlnumliner   rn   rs   rt   r   r   r   test_readcsv_memmap_utf8  s.    "
    r   Zpyarrow_xfailrS   zw+bzw+tc              	   C   sh   | }d}d|krd}t j|d$}|| |d ||}W 5 Q R X tg dgd}t|| d S )Ns   abcdtabcdrR   r   r   )r_   SpooledTemporaryFiler,   r\   r   r   r   r   )r   rS   r   contenthandlern   r   r   r   r   test_not_readable,  s    

r   )#__doc__r*   r   r5   r_   r'   numpynpZpytestZpandasr   r   Zpandas._testingZ_testingr   markZusefixturesZskip_pyarrowr   r   Zparametrizer3   r:   r>   nanrI   rN   rY   r]   rb   rd   ro   ru   r   r   r   r   r   r   <module>   s   

	 




 



