8th day of python challenges 111-117

2019-08-04 15:26:35 +03:00
parent b04c1b055f
commit 627802c383
3215 changed files with 760227 additions and 491 deletions
--- a/venv/lib/python3.6/site-packages/pandas/io/sas/init.py
+++ b/venv/lib/python3.6/site-packages/pandas/io/sas/init.py
@@ -0,0 +1 @@
+from .sasreader import read_sas  # noqa
--- a/venv/lib/python3.6/site-packages/pandas/io/sas/_sas.cpython-36m-x86_64-linux-gnu.so
+++ b/venv/lib/python3.6/site-packages/pandas/io/sas/_sas.cpython-36m-x86_64-linux-gnu.so
--- a/venv/lib/python3.6/site-packages/pandas/io/sas/sas7bdat.py
+++ b/venv/lib/python3.6/site-packages/pandas/io/sas/sas7bdat.py
@@ -0,0 +1,732 @@
+"""
+Read SAS7BDAT files
+
+Based on code written by Jared Hobbs:
+  https://bitbucket.org/jaredhobbs/sas7bdat
+
+See also:
+  https://github.com/BioStatMatt/sas7bdat
+
+Partial documentation of the file format:
+  https://cran.r-project.org/package=sas7bdat/vignettes/sas7bdat.pdf
+
+Reference for binary data compression:
+  http://collaboration.cmc.ec.gc.ca/science/rpn/biblio/ddj/Website/articles/CUJ/1992/9210/ross/ross.htm
+"""
+from datetime import datetime
+import struct
+
+import numpy as np
+
+from pandas.errors import EmptyDataError
+
+import pandas as pd
+
+from pandas.io.common import BaseIterator, get_filepath_or_buffer
+from pandas.io.sas._sas import Parser
+import pandas.io.sas.sas_constants as const
+
+
+class _subheader_pointer:
+    pass
+
+
+class _column:
+    pass
+
+
+# SAS7BDAT represents a SAS data file in SAS7BDAT format.
+class SAS7BDATReader(BaseIterator):
+    """
+    Read SAS files in SAS7BDAT format.
+
+    Parameters
+    ----------
+    path_or_buf : path name or buffer
+        Name of SAS file or file-like object pointing to SAS file
+        contents.
+    index : column identifier, defaults to None
+        Column to use as index.
+    convert_dates : boolean, defaults to True
+        Attempt to convert dates to Pandas datetime values.  Note that
+        some rarely used SAS date formats may be unsupported.
+    blank_missing : boolean, defaults to True
+        Convert empty strings to missing values (SAS uses blanks to
+        indicate missing character variables).
+    chunksize : int, defaults to None
+        Return SAS7BDATReader object for iterations, returns chunks
+        with given number of lines.
+    encoding : string, defaults to None
+        String encoding.
+    convert_text : bool, defaults to True
+        If False, text variables are left as raw bytes.
+    convert_header_text : bool, defaults to True
+        If False, header text, including column names, are left as raw
+        bytes.
+    """
+
+    def __init__(
+        self,
+        path_or_buf,
+        index=None,
+        convert_dates=True,
+        blank_missing=True,
+        chunksize=None,
+        encoding=None,
+        convert_text=True,
+        convert_header_text=True,
+    ):
+
+        self.index = index
+        self.convert_dates = convert_dates
+        self.blank_missing = blank_missing
+        self.chunksize = chunksize
+        self.encoding = encoding
+        self.convert_text = convert_text
+        self.convert_header_text = convert_header_text
+
+        self.default_encoding = "latin-1"
+        self.compression = ""
+        self.column_names_strings = []
+        self.column_names = []
+        self.column_formats = []
+        self.columns = []
+
+        self._current_page_data_subheader_pointers = []
+        self._cached_page = None
+        self._column_data_lengths = []
+        self._column_data_offsets = []
+        self._column_types = []
+
+        self._current_row_in_file_index = 0
+        self._current_row_on_page_index = 0
+        self._current_row_in_file_index = 0
+
+        self._path_or_buf, _, _, _ = get_filepath_or_buffer(path_or_buf)
+        if isinstance(self._path_or_buf, str):
+            self._path_or_buf = open(self._path_or_buf, "rb")
+            self.handle = self._path_or_buf
+
+        self._get_properties()
+        self._parse_metadata()
+
+    def column_data_lengths(self):
+        """Return a numpy int64 array of the column data lengths"""
+        return np.asarray(self._column_data_lengths, dtype=np.int64)
+
+    def column_data_offsets(self):
+        """Return a numpy int64 array of the column offsets"""
+        return np.asarray(self._column_data_offsets, dtype=np.int64)
+
+    def column_types(self):
+        """Returns a numpy character array of the column types:
+           s (string) or d (double)"""
+        return np.asarray(self._column_types, dtype=np.dtype("S1"))
+
+    def close(self):
+        try:
+            self.handle.close()
+        except AttributeError:
+            pass
+
+    def _get_properties(self):
+
+        # Check magic number
+        self._path_or_buf.seek(0)
+        self._cached_page = self._path_or_buf.read(288)
+        if self._cached_page[0 : len(const.magic)] != const.magic:
+            self.close()
+            raise ValueError("magic number mismatch (not a SAS file?)")
+
+        # Get alignment information
+        align1, align2 = 0, 0
+        buf = self._read_bytes(const.align_1_offset, const.align_1_length)
+        if buf == const.u64_byte_checker_value:
+            align2 = const.align_2_value
+            self.U64 = True
+            self._int_length = 8
+            self._page_bit_offset = const.page_bit_offset_x64
+            self._subheader_pointer_length = const.subheader_pointer_length_x64
+        else:
+            self.U64 = False
+            self._page_bit_offset = const.page_bit_offset_x86
+            self._subheader_pointer_length = const.subheader_pointer_length_x86
+            self._int_length = 4
+        buf = self._read_bytes(const.align_2_offset, const.align_2_length)
+        if buf == const.align_1_checker_value:
+            align1 = const.align_2_value
+        total_align = align1 + align2
+
+        # Get endianness information
+        buf = self._read_bytes(const.endianness_offset, const.endianness_length)
+        if buf == b"\x01":
+            self.byte_order = "<"
+        else:
+            self.byte_order = ">"
+
+        # Get encoding information
+        buf = self._read_bytes(const.encoding_offset, const.encoding_length)[0]
+        if buf in const.encoding_names:
+            self.file_encoding = const.encoding_names[buf]
+        else:
+            self.file_encoding = "unknown (code={name!s})".format(name=buf)
+
+        # Get platform information
+        buf = self._read_bytes(const.platform_offset, const.platform_length)
+        if buf == b"1":
+            self.platform = "unix"
+        elif buf == b"2":
+            self.platform = "windows"
+        else:
+            self.platform = "unknown"
+
+        buf = self._read_bytes(const.dataset_offset, const.dataset_length)
+        self.name = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.name = self.name.decode(self.encoding or self.default_encoding)
+
+        buf = self._read_bytes(const.file_type_offset, const.file_type_length)
+        self.file_type = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.file_type = self.file_type.decode(
+                self.encoding or self.default_encoding
+            )
+
+        # Timestamp is epoch 01/01/1960
+        epoch = datetime(1960, 1, 1)
+        x = self._read_float(
+            const.date_created_offset + align1, const.date_created_length
+        )
+        self.date_created = epoch + pd.to_timedelta(x, unit="s")
+        x = self._read_float(
+            const.date_modified_offset + align1, const.date_modified_length
+        )
+        self.date_modified = epoch + pd.to_timedelta(x, unit="s")
+
+        self.header_length = self._read_int(
+            const.header_size_offset + align1, const.header_size_length
+        )
+
+        # Read the rest of the header into cached_page.
+        buf = self._path_or_buf.read(self.header_length - 288)
+        self._cached_page += buf
+        if len(self._cached_page) != self.header_length:
+            self.close()
+            raise ValueError("The SAS7BDAT file appears to be truncated.")
+
+        self._page_length = self._read_int(
+            const.page_size_offset + align1, const.page_size_length
+        )
+        self._page_count = self._read_int(
+            const.page_count_offset + align1, const.page_count_length
+        )
+
+        buf = self._read_bytes(
+            const.sas_release_offset + total_align, const.sas_release_length
+        )
+        self.sas_release = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.sas_release = self.sas_release.decode(
+                self.encoding or self.default_encoding
+            )
+
+        buf = self._read_bytes(
+            const.sas_server_type_offset + total_align, const.sas_server_type_length
+        )
+        self.server_type = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.server_type = self.server_type.decode(
+                self.encoding or self.default_encoding
+            )
+
+        buf = self._read_bytes(
+            const.os_version_number_offset + total_align, const.os_version_number_length
+        )
+        self.os_version = buf.rstrip(b"\x00 ")
+        if self.convert_header_text:
+            self.os_version = self.os_version.decode(
+                self.encoding or self.default_encoding
+            )
+
+        buf = self._read_bytes(const.os_name_offset + total_align, const.os_name_length)
+        buf = buf.rstrip(b"\x00 ")
+        if len(buf) > 0:
+            self.os_name = buf.decode(self.encoding or self.default_encoding)
+        else:
+            buf = self._read_bytes(
+                const.os_maker_offset + total_align, const.os_maker_length
+            )
+            self.os_name = buf.rstrip(b"\x00 ")
+            if self.convert_header_text:
+                self.os_name = self.os_name.decode(
+                    self.encoding or self.default_encoding
+                )
+
+    def __next__(self):
+        da = self.read(nrows=self.chunksize or 1)
+        if da is None:
+            raise StopIteration
+        return da
+
+    # Read a single float of the given width (4 or 8).
+    def _read_float(self, offset, width):
+        if width not in (4, 8):
+            self.close()
+            raise ValueError("invalid float width")
+        buf = self._read_bytes(offset, width)
+        fd = "f" if width == 4 else "d"
+        return struct.unpack(self.byte_order + fd, buf)[0]
+
+    # Read a single signed integer of the given width (1, 2, 4 or 8).
+    def _read_int(self, offset, width):
+        if width not in (1, 2, 4, 8):
+            self.close()
+            raise ValueError("invalid int width")
+        buf = self._read_bytes(offset, width)
+        it = {1: "b", 2: "h", 4: "l", 8: "q"}[width]
+        iv = struct.unpack(self.byte_order + it, buf)[0]
+        return iv
+
+    def _read_bytes(self, offset, length):
+        if self._cached_page is None:
+            self._path_or_buf.seek(offset)
+            buf = self._path_or_buf.read(length)
+            if len(buf) < length:
+                self.close()
+                msg = "Unable to read {:d} bytes from file position {:d}."
+                raise ValueError(msg.format(length, offset))
+            return buf
+        else:
+            if offset + length > len(self._cached_page):
+                self.close()
+                raise ValueError("The cached page is too small.")
+            return self._cached_page[offset : offset + length]
+
+    def _parse_metadata(self):
+        done = False
+        while not done:
+            self._cached_page = self._path_or_buf.read(self._page_length)
+            if len(self._cached_page) <= 0:
+                break
+            if len(self._cached_page) != self._page_length:
+                self.close()
+                raise ValueError("Failed to read a meta data page from the SAS file.")
+            done = self._process_page_meta()
+
+    def _process_page_meta(self):
+        self._read_page_header()
+        pt = [const.page_meta_type, const.page_amd_type] + const.page_mix_types
+        if self._current_page_type in pt:
+            self._process_page_metadata()
+        is_data_page = self._current_page_type & const.page_data_type
+        is_mix_page = self._current_page_type in const.page_mix_types
+        return (
+            is_data_page
+            or is_mix_page
+            or self._current_page_data_subheader_pointers != []
+        )
+
+    def _read_page_header(self):
+        bit_offset = self._page_bit_offset
+        tx = const.page_type_offset + bit_offset
+        self._current_page_type = self._read_int(tx, const.page_type_length)
+        tx = const.block_count_offset + bit_offset
+        self._current_page_block_count = self._read_int(tx, const.block_count_length)
+        tx = const.subheader_count_offset + bit_offset
+        self._current_page_subheaders_count = self._read_int(
+            tx, const.subheader_count_length
+        )
+
+    def _process_page_metadata(self):
+        bit_offset = self._page_bit_offset
+
+        for i in range(self._current_page_subheaders_count):
+            pointer = self._process_subheader_pointers(
+                const.subheader_pointers_offset + bit_offset, i
+            )
+            if pointer.length == 0:
+                continue
+            if pointer.compression == const.truncated_subheader_id:
+                continue
+            subheader_signature = self._read_subheader_signature(pointer.offset)
+            subheader_index = self._get_subheader_index(
+                subheader_signature, pointer.compression, pointer.ptype
+            )
+            self._process_subheader(subheader_index, pointer)
+
+    def _get_subheader_index(self, signature, compression, ptype):
+        index = const.subheader_signature_to_index.get(signature)
+        if index is None:
+            f1 = (compression == const.compressed_subheader_id) or (compression == 0)
+            f2 = ptype == const.compressed_subheader_type
+            if (self.compression != "") and f1 and f2:
+                index = const.SASIndex.data_subheader_index
+            else:
+                self.close()
+                raise ValueError("Unknown subheader signature")
+        return index
+
+    def _process_subheader_pointers(self, offset, subheader_pointer_index):
+
+        subheader_pointer_length = self._subheader_pointer_length
+        total_offset = offset + subheader_pointer_length * subheader_pointer_index
+
+        subheader_offset = self._read_int(total_offset, self._int_length)
+        total_offset += self._int_length
+
+        subheader_length = self._read_int(total_offset, self._int_length)
+        total_offset += self._int_length
+
+        subheader_compression = self._read_int(total_offset, 1)
+        total_offset += 1
+
+        subheader_type = self._read_int(total_offset, 1)
+
+        x = _subheader_pointer()
+        x.offset = subheader_offset
+        x.length = subheader_length
+        x.compression = subheader_compression
+        x.ptype = subheader_type
+
+        return x
+
+    def _read_subheader_signature(self, offset):
+        subheader_signature = self._read_bytes(offset, self._int_length)
+        return subheader_signature
+
+    def _process_subheader(self, subheader_index, pointer):
+        offset = pointer.offset
+        length = pointer.length
+
+        if subheader_index == const.SASIndex.row_size_index:
+            processor = self._process_rowsize_subheader
+        elif subheader_index == const.SASIndex.column_size_index:
+            processor = self._process_columnsize_subheader
+        elif subheader_index == const.SASIndex.column_text_index:
+            processor = self._process_columntext_subheader
+        elif subheader_index == const.SASIndex.column_name_index:
+            processor = self._process_columnname_subheader
+        elif subheader_index == const.SASIndex.column_attributes_index:
+            processor = self._process_columnattributes_subheader
+        elif subheader_index == const.SASIndex.format_and_label_index:
+            processor = self._process_format_subheader
+        elif subheader_index == const.SASIndex.column_list_index:
+            processor = self._process_columnlist_subheader
+        elif subheader_index == const.SASIndex.subheader_counts_index:
+            processor = self._process_subheader_counts
+        elif subheader_index == const.SASIndex.data_subheader_index:
+            self._current_page_data_subheader_pointers.append(pointer)
+            return
+        else:
+            raise ValueError("unknown subheader index")
+
+        processor(offset, length)
+
+    def _process_rowsize_subheader(self, offset, length):
+
+        int_len = self._int_length
+        lcs_offset = offset
+        lcp_offset = offset
+        if self.U64:
+            lcs_offset += 682
+            lcp_offset += 706
+        else:
+            lcs_offset += 354
+            lcp_offset += 378
+
+        self.row_length = self._read_int(
+            offset + const.row_length_offset_multiplier * int_len, int_len
+        )
+        self.row_count = self._read_int(
+            offset + const.row_count_offset_multiplier * int_len, int_len
+        )
+        self.col_count_p1 = self._read_int(
+            offset + const.col_count_p1_multiplier * int_len, int_len
+        )
+        self.col_count_p2 = self._read_int(
+            offset + const.col_count_p2_multiplier * int_len, int_len
+        )
+        mx = const.row_count_on_mix_page_offset_multiplier * int_len
+        self._mix_page_row_count = self._read_int(offset + mx, int_len)
+        self._lcs = self._read_int(lcs_offset, 2)
+        self._lcp = self._read_int(lcp_offset, 2)
+
+    def _process_columnsize_subheader(self, offset, length):
+        int_len = self._int_length
+        offset += int_len
+        self.column_count = self._read_int(offset, int_len)
+        if self.col_count_p1 + self.col_count_p2 != self.column_count:
+            print(
+                "Warning: column count mismatch ({p1} + {p2} != "
+                "{column_count})\n".format(
+                    p1=self.col_count_p1,
+                    p2=self.col_count_p2,
+                    column_count=self.column_count,
+                )
+            )
+
+    # Unknown purpose
+    def _process_subheader_counts(self, offset, length):
+        pass
+
+    def _process_columntext_subheader(self, offset, length):
+
+        offset += self._int_length
+        text_block_size = self._read_int(offset, const.text_block_size_length)
+
+        buf = self._read_bytes(offset, text_block_size)
+        cname_raw = buf[0:text_block_size].rstrip(b"\x00 ")
+        cname = cname_raw
+        if self.convert_header_text:
+            cname = cname.decode(self.encoding or self.default_encoding)
+        self.column_names_strings.append(cname)
+
+        if len(self.column_names_strings) == 1:
+            compression_literal = ""
+            for cl in const.compression_literals:
+                if cl in cname_raw:
+                    compression_literal = cl
+            self.compression = compression_literal
+            offset -= self._int_length
+
+            offset1 = offset + 16
+            if self.U64:
+                offset1 += 4
+
+            buf = self._read_bytes(offset1, self._lcp)
+            compression_literal = buf.rstrip(b"\x00")
+            if compression_literal == "":
+                self._lcs = 0
+                offset1 = offset + 32
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif compression_literal == const.rle_compression:
+                offset1 = offset + 40
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcp)
+                self.creator_proc = buf[0 : self._lcp]
+            elif self._lcs > 0:
+                self._lcp = 0
+                offset1 = offset + 16
+                if self.U64:
+                    offset1 += 4
+                buf = self._read_bytes(offset1, self._lcs)
+                self.creator_proc = buf[0 : self._lcp]
+            if self.convert_header_text:
+                if hasattr(self, "creator_proc"):
+                    self.creator_proc = self.creator_proc.decode(
+                        self.encoding or self.default_encoding
+                    )
+
+    def _process_columnname_subheader(self, offset, length):
+        int_len = self._int_length
+        offset += int_len
+        column_name_pointers_count = (length - 2 * int_len - 12) // 8
+        for i in range(column_name_pointers_count):
+            text_subheader = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_text_subheader_offset
+            )
+            col_name_offset = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_offset_offset
+            )
+            col_name_length = (
+                offset
+                + const.column_name_pointer_length * (i + 1)
+                + const.column_name_length_offset
+            )
+
+            idx = self._read_int(
+                text_subheader, const.column_name_text_subheader_length
+            )
+            col_offset = self._read_int(
+                col_name_offset, const.column_name_offset_length
+            )
+            col_len = self._read_int(col_name_length, const.column_name_length_length)
+
+            name_str = self.column_names_strings[idx]
+            self.column_names.append(name_str[col_offset : col_offset + col_len])
+
+    def _process_columnattributes_subheader(self, offset, length):
+        int_len = self._int_length
+        column_attributes_vectors_count = (length - 2 * int_len - 12) // (int_len + 8)
+        for i in range(column_attributes_vectors_count):
+            col_data_offset = (
+                offset + int_len + const.column_data_offset_offset + i * (int_len + 8)
+            )
+            col_data_len = (
+                offset
+                + 2 * int_len
+                + const.column_data_length_offset
+                + i * (int_len + 8)
+            )
+            col_types = (
+                offset + 2 * int_len + const.column_type_offset + i * (int_len + 8)
+            )
+
+            x = self._read_int(col_data_offset, int_len)
+            self._column_data_offsets.append(x)
+
+            x = self._read_int(col_data_len, const.column_data_length_length)
+            self._column_data_lengths.append(x)
+
+            x = self._read_int(col_types, const.column_type_length)
+            self._column_types.append(b"d" if x == 1 else b"s")
+
+    def _process_columnlist_subheader(self, offset, length):
+        # unknown purpose
+        pass
+
+    def _process_format_subheader(self, offset, length):
+        int_len = self._int_length
+        text_subheader_format = (
+            offset + const.column_format_text_subheader_index_offset + 3 * int_len
+        )
+        col_format_offset = offset + const.column_format_offset_offset + 3 * int_len
+        col_format_len = offset + const.column_format_length_offset + 3 * int_len
+        text_subheader_label = (
+            offset + const.column_label_text_subheader_index_offset + 3 * int_len
+        )
+        col_label_offset = offset + const.column_label_offset_offset + 3 * int_len
+        col_label_len = offset + const.column_label_length_offset + 3 * int_len
+
+        x = self._read_int(
+            text_subheader_format, const.column_format_text_subheader_index_length
+        )
+        format_idx = min(x, len(self.column_names_strings) - 1)
+
+        format_start = self._read_int(
+            col_format_offset, const.column_format_offset_length
+        )
+        format_len = self._read_int(col_format_len, const.column_format_length_length)
+
+        label_idx = self._read_int(
+            text_subheader_label, const.column_label_text_subheader_index_length
+        )
+        label_idx = min(label_idx, len(self.column_names_strings) - 1)
+
+        label_start = self._read_int(col_label_offset, const.column_label_offset_length)
+        label_len = self._read_int(col_label_len, const.column_label_length_length)
+
+        label_names = self.column_names_strings[label_idx]
+        column_label = label_names[label_start : label_start + label_len]
+        format_names = self.column_names_strings[format_idx]
+        column_format = format_names[format_start : format_start + format_len]
+        current_column_number = len(self.columns)
+
+        col = _column()
+        col.col_id = current_column_number
+        col.name = self.column_names[current_column_number]
+        col.label = column_label
+        col.format = column_format
+        col.ctype = self._column_types[current_column_number]
+        col.length = self._column_data_lengths[current_column_number]
+
+        self.column_formats.append(column_format)
+        self.columns.append(col)
+
+    def read(self, nrows=None):
+
+        if (nrows is None) and (self.chunksize is not None):
+            nrows = self.chunksize
+        elif nrows is None:
+            nrows = self.row_count
+
+        if len(self._column_types) == 0:
+            self.close()
+            raise EmptyDataError("No columns to parse from file")
+
+        if self._current_row_in_file_index >= self.row_count:
+            return None
+
+        m = self.row_count - self._current_row_in_file_index
+        if nrows > m:
+            nrows = m
+
+        nd = self._column_types.count(b"d")
+        ns = self._column_types.count(b"s")
+
+        self._string_chunk = np.empty((ns, nrows), dtype=np.object)
+        self._byte_chunk = np.zeros((nd, 8 * nrows), dtype=np.uint8)
+
+        self._current_row_in_chunk_index = 0
+        p = Parser(self)
+        p.read(nrows)
+
+        rslt = self._chunk_to_dataframe()
+        if self.index is not None:
+            rslt = rslt.set_index(self.index)
+
+        return rslt
+
+    def _read_next_page(self):
+        self._current_page_data_subheader_pointers = []
+        self._cached_page = self._path_or_buf.read(self._page_length)
+        if len(self._cached_page) <= 0:
+            return True
+        elif len(self._cached_page) != self._page_length:
+            self.close()
+            msg = "failed to read complete page from file " "(read {:d} of {:d} bytes)"
+            raise ValueError(msg.format(len(self._cached_page), self._page_length))
+
+        self._read_page_header()
+        page_type = self._current_page_type
+        if page_type == const.page_meta_type:
+            self._process_page_metadata()
+
+        is_data_page = page_type & const.page_data_type
+        pt = [const.page_meta_type] + const.page_mix_types
+        if not is_data_page and self._current_page_type not in pt:
+            return self._read_next_page()
+
+        return False
+
+    def _chunk_to_dataframe(self):
+
+        n = self._current_row_in_chunk_index
+        m = self._current_row_in_file_index
+        ix = range(m - n, m)
+        rslt = pd.DataFrame(index=ix)
+
+        js, jb = 0, 0
+        for j in range(self.column_count):
+
+            name = self.column_names[j]
+
+            if self._column_types[j] == b"d":
+                rslt[name] = self._byte_chunk[jb, :].view(dtype=self.byte_order + "d")
+                rslt[name] = np.asarray(rslt[name], dtype=np.float64)
+                if self.convert_dates:
+                    unit = None
+                    if self.column_formats[j] in const.sas_date_formats:
+                        unit = "d"
+                    elif self.column_formats[j] in const.sas_datetime_formats:
+                        unit = "s"
+                    if unit:
+                        rslt[name] = pd.to_datetime(
+                            rslt[name], unit=unit, origin="1960-01-01"
+                        )
+                jb += 1
+            elif self._column_types[j] == b"s":
+                rslt[name] = self._string_chunk[js, :]
+                if self.convert_text and (self.encoding is not None):
+                    rslt[name] = rslt[name].str.decode(
+                        self.encoding or self.default_encoding
+                    )
+                if self.blank_missing:
+                    ii = rslt[name].str.len() == 0
+                    rslt.loc[ii, name] = np.nan
+                js += 1
+            else:
+                self.close()
+                raise ValueError(
+                    "unknown column type {type}".format(type=self._column_types[j])
+                )
+
+        return rslt
--- a/venv/lib/python3.6/site-packages/pandas/io/sas/sas_constants.py
+++ b/venv/lib/python3.6/site-packages/pandas/io/sas/sas_constants.py
@@ -0,0 +1,253 @@
+magic = (
+    b"\x00\x00\x00\x00\x00\x00\x00\x00"
+    + b"\x00\x00\x00\x00\xc2\xea\x81\x60"
+    + b"\xb3\x14\x11\xcf\xbd\x92\x08\x00"
+    + b"\x09\xc7\x31\x8c\x18\x1f\x10\x11"
+)
+
+align_1_checker_value = b"3"
+align_1_offset = 32
+align_1_length = 1
+align_1_value = 4
+u64_byte_checker_value = b"3"
+align_2_offset = 35
+align_2_length = 1
+align_2_value = 4
+endianness_offset = 37
+endianness_length = 1
+platform_offset = 39
+platform_length = 1
+encoding_offset = 70
+encoding_length = 1
+dataset_offset = 92
+dataset_length = 64
+file_type_offset = 156
+file_type_length = 8
+date_created_offset = 164
+date_created_length = 8
+date_modified_offset = 172
+date_modified_length = 8
+header_size_offset = 196
+header_size_length = 4
+page_size_offset = 200
+page_size_length = 4
+page_count_offset = 204
+page_count_length = 4
+sas_release_offset = 216
+sas_release_length = 8
+sas_server_type_offset = 224
+sas_server_type_length = 16
+os_version_number_offset = 240
+os_version_number_length = 16
+os_maker_offset = 256
+os_maker_length = 16
+os_name_offset = 272
+os_name_length = 16
+page_bit_offset_x86 = 16
+page_bit_offset_x64 = 32
+subheader_pointer_length_x86 = 12
+subheader_pointer_length_x64 = 24
+page_type_offset = 0
+page_type_length = 2
+block_count_offset = 2
+block_count_length = 2
+subheader_count_offset = 4
+subheader_count_length = 2
+page_meta_type = 0
+page_data_type = 256
+page_amd_type = 1024
+page_metc_type = 16384
+page_comp_type = -28672
+page_mix_types = [512, 640]
+subheader_pointers_offset = 8
+truncated_subheader_id = 1
+compressed_subheader_id = 4
+compressed_subheader_type = 1
+text_block_size_length = 2
+row_length_offset_multiplier = 5
+row_count_offset_multiplier = 6
+col_count_p1_multiplier = 9
+col_count_p2_multiplier = 10
+row_count_on_mix_page_offset_multiplier = 15
+column_name_pointer_length = 8
+column_name_text_subheader_offset = 0
+column_name_text_subheader_length = 2
+column_name_offset_offset = 2
+column_name_offset_length = 2
+column_name_length_offset = 4
+column_name_length_length = 2
+column_data_offset_offset = 8
+column_data_length_offset = 8
+column_data_length_length = 4
+column_type_offset = 14
+column_type_length = 1
+column_format_text_subheader_index_offset = 22
+column_format_text_subheader_index_length = 2
+column_format_offset_offset = 24
+column_format_offset_length = 2
+column_format_length_offset = 26
+column_format_length_length = 2
+column_label_text_subheader_index_offset = 28
+column_label_text_subheader_index_length = 2
+column_label_offset_offset = 30
+column_label_offset_length = 2
+column_label_length_offset = 32
+column_label_length_length = 2
+rle_compression = b"SASYZCRL"
+rdc_compression = b"SASYZCR2"
+
+compression_literals = [rle_compression, rdc_compression]
+
+# Incomplete list of encodings, using SAS nomenclature:
+# http://support.sas.com/documentation/cdl/en/nlsref/61893/HTML/default/viewer.htm#a002607278.htm
+encoding_names = {
+    29: "latin1",
+    20: "utf-8",
+    33: "cyrillic",
+    60: "wlatin2",
+    61: "wcyrillic",
+    62: "wlatin1",
+    90: "ebcdic870",
+}
+
+
+class SASIndex:
+    row_size_index = 0
+    column_size_index = 1
+    subheader_counts_index = 2
+    column_text_index = 3
+    column_name_index = 4
+    column_attributes_index = 5
+    format_and_label_index = 6
+    column_list_index = 7
+    data_subheader_index = 8
+
+
+subheader_signature_to_index = {
+    b"\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\x00\x00\x00\x00\xF7\xF7\xF7\xF7": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\x00\x00\x00\x00": SASIndex.row_size_index,
+    b"\xF7\xF7\xF7\xF7\xFF\xFF\xFB\xFE": SASIndex.row_size_index,
+    b"\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\x00\x00\x00\x00\xF6\xF6\xF6\xF6": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\x00\x00\x00\x00": SASIndex.column_size_index,
+    b"\xF6\xF6\xF6\xF6\xFF\xFF\xFB\xFE": SASIndex.column_size_index,
+    b"\x00\xFC\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\x00\xFC\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.subheader_counts_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFC\x00": SASIndex.subheader_counts_index,
+    b"\xFD\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFD\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFD": SASIndex.column_text_index,
+    b"\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_name_index,
+    b"\xFC\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFC\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_attributes_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFC": SASIndex.column_attributes_index,
+    b"\xFE\xFB\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFB\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.format_and_label_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFB\xFE": SASIndex.format_and_label_index,
+    b"\xFE\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
+    b"\xFE\xFF\xFF\xFF\xFF\xFF\xFF\xFF": SASIndex.column_list_index,
+    b"\xFF\xFF\xFF\xFF\xFF\xFF\xFF\xFE": SASIndex.column_list_index,
+}
+
+
+# List of frequently used SAS date and datetime formats
+# http://support.sas.com/documentation/cdl/en/etsug/60372/HTML/default/viewer.htm#etsug_intervals_sect009.htm
+# https://github.com/epam/parso/blob/master/src/main/java/com/epam/parso/impl/SasFileConstants.java
+sas_date_formats = (
+    "DATE",
+    "DAY",
+    "DDMMYY",
+    "DOWNAME",
+    "JULDAY",
+    "JULIAN",
+    "MMDDYY",
+    "MMYY",
+    "MMYYC",
+    "MMYYD",
+    "MMYYP",
+    "MMYYS",
+    "MMYYN",
+    "MONNAME",
+    "MONTH",
+    "MONYY",
+    "QTR",
+    "QTRR",
+    "NENGO",
+    "WEEKDATE",
+    "WEEKDATX",
+    "WEEKDAY",
+    "WEEKV",
+    "WORDDATE",
+    "WORDDATX",
+    "YEAR",
+    "YYMM",
+    "YYMMC",
+    "YYMMD",
+    "YYMMP",
+    "YYMMS",
+    "YYMMN",
+    "YYMON",
+    "YYMMDD",
+    "YYQ",
+    "YYQC",
+    "YYQD",
+    "YYQP",
+    "YYQS",
+    "YYQN",
+    "YYQR",
+    "YYQRC",
+    "YYQRD",
+    "YYQRP",
+    "YYQRS",
+    "YYQRN",
+    "YYMMDDP",
+    "YYMMDDC",
+    "E8601DA",
+    "YYMMDDN",
+    "MMDDYYC",
+    "MMDDYYS",
+    "MMDDYYD",
+    "YYMMDDS",
+    "B8601DA",
+    "DDMMYYN",
+    "YYMMDDD",
+    "DDMMYYB",
+    "DDMMYYP",
+    "MMDDYYP",
+    "YYMMDDB",
+    "MMDDYYN",
+    "DDMMYYC",
+    "DDMMYYD",
+    "DDMMYYS",
+    "MINGUO",
+)
+
+sas_datetime_formats = (
+    "DATETIME",
+    "DTWKDATX",
+    "B8601DN",
+    "B8601DT",
+    "B8601DX",
+    "B8601DZ",
+    "B8601LX",
+    "E8601DN",
+    "E8601DT",
+    "E8601DX",
+    "E8601DZ",
+    "E8601LX",
+    "DATEAMPM",
+    "DTDATE",
+    "DTMONYY",
+    "DTMONYY",
+    "DTWKDATX",
+    "DTYEAR",
+    "TOD",
+    "MDYAMPM",
+)
--- a/venv/lib/python3.6/site-packages/pandas/io/sas/sas_xport.py
+++ b/venv/lib/python3.6/site-packages/pandas/io/sas/sas_xport.py
@@ -0,0 +1,507 @@
+"""
+Read a SAS XPort format file into a Pandas DataFrame.
+
+Based on code from Jack Cushman (github.com/jcushman/xport).
+
+The file format is defined here:
+
+https://support.sas.com/techsup/technote/ts140.pdf
+"""
+
+from datetime import datetime
+from io import BytesIO
+import struct
+import warnings
+
+import numpy as np
+
+from pandas.util._decorators import Appender
+
+import pandas as pd
+
+from pandas.io.common import BaseIterator, get_filepath_or_buffer
+
+_correct_line1 = (
+    "HEADER RECORD*******LIBRARY HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_correct_header1 = (
+    "HEADER RECORD*******MEMBER  HEADER RECORD!!!!!!!" "000000000000000001600000000"
+)
+_correct_header2 = (
+    "HEADER RECORD*******DSCRPTR HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_correct_obs_header = (
+    "HEADER RECORD*******OBS     HEADER RECORD!!!!!!!"
+    "000000000000000000000000000000  "
+)
+_fieldkeys = [
+    "ntype",
+    "nhfun",
+    "field_length",
+    "nvar0",
+    "name",
+    "label",
+    "nform",
+    "nfl",
+    "num_decimals",
+    "nfj",
+    "nfill",
+    "niform",
+    "nifl",
+    "nifd",
+    "npos",
+    "_",
+]
+
+
+_base_params_doc = """\
+Parameters
+----------
+filepath_or_buffer : string or file-like object
+    Path to SAS file or object implementing binary read method."""
+
+_params2_doc = """\
+index : identifier of index column
+    Identifier of column that should be used as index of the DataFrame.
+encoding : string
+    Encoding for text data.
+chunksize : int
+    Read file `chunksize` lines at a time, returns iterator."""
+
+_format_params_doc = """\
+format : string
+    File format, only `xport` is currently supported."""
+
+_iterator_doc = """\
+iterator : boolean, default False
+    Return XportReader object for reading file incrementally."""
+
+
+_read_sas_doc = """Read a SAS file into a DataFrame.
+
+%(_base_params_doc)s
+%(_format_params_doc)s
+%(_params2_doc)s
+%(_iterator_doc)s
+
+Returns
+-------
+DataFrame or XportReader
+
+Examples
+--------
+Read a SAS Xport file:
+
+>>> df = pd.read_sas('filename.XPT')
+
+Read a Xport file in 10,000 line chunks:
+
+>>> itr = pd.read_sas('filename.XPT', chunksize=10000)
+>>> for chunk in itr:
+>>>     do_something(chunk)
+
+""" % {
+    "_base_params_doc": _base_params_doc,
+    "_format_params_doc": _format_params_doc,
+    "_params2_doc": _params2_doc,
+    "_iterator_doc": _iterator_doc,
+}
+
+
+_xport_reader_doc = """\
+Class for reading SAS Xport files.
+
+%(_base_params_doc)s
+%(_params2_doc)s
+
+Attributes
+----------
+member_info : list
+    Contains information about the file
+fields : list
+    Contains information about the variables in the file
+""" % {
+    "_base_params_doc": _base_params_doc,
+    "_params2_doc": _params2_doc,
+}
+
+
+_read_method_doc = """\
+Read observations from SAS Xport file, returning as data frame.
+
+Parameters
+----------
+nrows : int
+    Number of rows to read from data file; if None, read whole
+    file.
+
+Returns
+-------
+A DataFrame.
+"""
+
+
+def _parse_date(datestr):
+    """ Given a date in xport format, return Python date. """
+    try:
+        # e.g. "16FEB11:10:07:55"
+        return datetime.strptime(datestr, "%d%b%y:%H:%M:%S")
+    except ValueError:
+        return pd.NaT
+
+
+def _split_line(s, parts):
+    """
+    Parameters
+    ----------
+    s: string
+        Fixed-length string to split
+    parts: list of (name, length) pairs
+        Used to break up string, name '_' will be filtered from output.
+
+    Returns
+    -------
+    Dict of name:contents of string at given location.
+    """
+    out = {}
+    start = 0
+    for name, length in parts:
+        out[name] = s[start : start + length].strip()
+        start += length
+    del out["_"]
+    return out
+
+
+def _handle_truncated_float_vec(vec, nbytes):
+    # This feature is not well documented, but some SAS XPORT files
+    # have 2-7 byte "truncated" floats.  To read these truncated
+    # floats, pad them with zeros on the right to make 8 byte floats.
+    #
+    # References:
+    # https://github.com/jcushman/xport/pull/3
+    # The R "foreign" library
+
+    if nbytes != 8:
+        vec1 = np.zeros(len(vec), np.dtype("S8"))
+        dtype = np.dtype("S%d,S%d" % (nbytes, 8 - nbytes))
+        vec2 = vec1.view(dtype=dtype)
+        vec2["f0"] = vec
+        return vec2
+
+    return vec
+
+
+def _parse_float_vec(vec):
+    """
+    Parse a vector of float values representing IBM 8 byte floats into
+    native 8 byte floats.
+    """
+
+    dtype = np.dtype(">u4,>u4")
+    vec1 = vec.view(dtype=dtype)
+    xport1 = vec1["f0"]
+    xport2 = vec1["f1"]
+
+    # Start by setting first half of ieee number to first half of IBM
+    # number sans exponent
+    ieee1 = xport1 & 0x00FFFFFF
+
+    # The fraction bit to the left of the binary point in the ieee
+    # format was set and the number was shifted 0, 1, 2, or 3
+    # places. This will tell us how to adjust the ibm exponent to be a
+    # power of 2 ieee exponent and how to shift the fraction bits to
+    # restore the correct magnitude.
+    shift = np.zeros(len(vec), dtype=np.uint8)
+    shift[np.where(xport1 & 0x00200000)] = 1
+    shift[np.where(xport1 & 0x00400000)] = 2
+    shift[np.where(xport1 & 0x00800000)] = 3
+
+    # shift the ieee number down the correct number of places then
+    # set the second half of the ieee number to be the second half
+    # of the ibm number shifted appropriately, ored with the bits
+    # from the first half that would have been shifted in if we
+    # could shift a double. All we are worried about are the low
+    # order 3 bits of the first half since we're only shifting by
+    # 1, 2, or 3.
+    ieee1 >>= shift
+    ieee2 = (xport2 >> shift) | ((xport1 & 0x00000007) << (29 + (3 - shift)))
+
+    # clear the 1 bit to the left of the binary point
+    ieee1 &= 0xFFEFFFFF
+
+    # set the exponent of the ieee number to be the actual exponent
+    # plus the shift count + 1023. Or this into the first half of the
+    # ieee number. The ibm exponent is excess 64 but is adjusted by 65
+    # since during conversion to ibm format the exponent is
+    # incremented by 1 and the fraction bits left 4 positions to the
+    # right of the radix point.  (had to add >> 24 because C treats &
+    # 0x7f as 0x7f000000 and Python doesn't)
+    ieee1 |= ((((((xport1 >> 24) & 0x7F) - 65) << 2) + shift + 1023) << 20) | (
+        xport1 & 0x80000000
+    )
+
+    ieee = np.empty((len(ieee1),), dtype=">u4,>u4")
+    ieee["f0"] = ieee1
+    ieee["f1"] = ieee2
+    ieee = ieee.view(dtype=">f8")
+    ieee = ieee.astype("f8")
+
+    return ieee
+
+
+class XportReader(BaseIterator):
+    __doc__ = _xport_reader_doc
+
+    def __init__(
+        self, filepath_or_buffer, index=None, encoding="ISO-8859-1", chunksize=None
+    ):
+
+        self._encoding = encoding
+        self._lines_read = 0
+        self._index = index
+        self._chunksize = chunksize
+
+        if isinstance(filepath_or_buffer, str):
+            (
+                filepath_or_buffer,
+                encoding,
+                compression,
+                should_close,
+            ) = get_filepath_or_buffer(filepath_or_buffer, encoding=encoding)
+
+        if isinstance(filepath_or_buffer, (str, bytes)):
+            self.filepath_or_buffer = open(filepath_or_buffer, "rb")
+        else:
+            # Copy to BytesIO, and ensure no encoding
+            contents = filepath_or_buffer.read()
+            try:
+                contents = contents.encode(self._encoding)
+            except UnicodeEncodeError:
+                pass
+            self.filepath_or_buffer = BytesIO(contents)
+
+        self._read_header()
+
+    def close(self):
+        self.filepath_or_buffer.close()
+
+    def _get_row(self):
+        return self.filepath_or_buffer.read(80).decode()
+
+    def _read_header(self):
+        self.filepath_or_buffer.seek(0)
+
+        # read file header
+        line1 = self._get_row()
+        if line1 != _correct_line1:
+            self.close()
+            raise ValueError("Header record is not an XPORT file.")
+
+        line2 = self._get_row()
+        fif = [["prefix", 24], ["version", 8], ["OS", 8], ["_", 24], ["created", 16]]
+        file_info = _split_line(line2, fif)
+        if file_info["prefix"] != "SAS     SAS     SASLIB":
+            self.close()
+            raise ValueError("Header record has invalid prefix.")
+        file_info["created"] = _parse_date(file_info["created"])
+        self.file_info = file_info
+
+        line3 = self._get_row()
+        file_info["modified"] = _parse_date(line3[:16])
+
+        # read member header
+        header1 = self._get_row()
+        header2 = self._get_row()
+        headflag1 = header1.startswith(_correct_header1)
+        headflag2 = header2 == _correct_header2
+        if not (headflag1 and headflag2):
+            self.close()
+            raise ValueError("Member header not found")
+        # usually 140, could be 135
+        fieldnamelength = int(header1[-5:-2])
+
+        # member info
+        mem = [
+            ["prefix", 8],
+            ["set_name", 8],
+            ["sasdata", 8],
+            ["version", 8],
+            ["OS", 8],
+            ["_", 24],
+            ["created", 16],
+        ]
+        member_info = _split_line(self._get_row(), mem)
+        mem = [["modified", 16], ["_", 16], ["label", 40], ["type", 8]]
+        member_info.update(_split_line(self._get_row(), mem))
+        member_info["modified"] = _parse_date(member_info["modified"])
+        member_info["created"] = _parse_date(member_info["created"])
+        self.member_info = member_info
+
+        # read field names
+        types = {1: "numeric", 2: "char"}
+        fieldcount = int(self._get_row()[54:58])
+        datalength = fieldnamelength * fieldcount
+        # round up to nearest 80
+        if datalength % 80:
+            datalength += 80 - datalength % 80
+        fielddata = self.filepath_or_buffer.read(datalength)
+        fields = []
+        obs_length = 0
+        while len(fielddata) >= fieldnamelength:
+            # pull data for one field
+            field, fielddata = (
+                fielddata[:fieldnamelength],
+                fielddata[fieldnamelength:],
+            )
+
+            # rest at end gets ignored, so if field is short, pad out
+            # to match struct pattern below
+            field = field.ljust(140)
+
+            fieldstruct = struct.unpack(">hhhh8s40s8shhh2s8shhl52s", field)
+            field = dict(zip(_fieldkeys, fieldstruct))
+            del field["_"]
+            field["ntype"] = types[field["ntype"]]
+            fl = field["field_length"]
+            if field["ntype"] == "numeric" and ((fl < 2) or (fl > 8)):
+                self.close()
+                msg = "Floating field width {0} is not between 2 and 8."
+                raise TypeError(msg.format(fl))
+
+            for k, v in field.items():
+                try:
+                    field[k] = v.strip()
+                except AttributeError:
+                    pass
+
+            obs_length += field["field_length"]
+            fields += [field]
+
+        header = self._get_row()
+        if not header == _correct_obs_header:
+            self.close()
+            raise ValueError("Observation header not found.")
+
+        self.fields = fields
+        self.record_length = obs_length
+        self.record_start = self.filepath_or_buffer.tell()
+
+        self.nobs = self._record_count()
+        self.columns = [x["name"].decode() for x in self.fields]
+
+        # Setup the dtype.
+        dtypel = [
+            ("s" + str(i), "S" + str(field["field_length"]))
+            for i, field in enumerate(self.fields)
+        ]
+        dtype = np.dtype(dtypel)
+        self._dtype = dtype
+
+    def __next__(self):
+        return self.read(nrows=self._chunksize or 1)
+
+    def _record_count(self):
+        """
+        Get number of records in file.
+
+        This is maybe suboptimal because we have to seek to the end of
+        the file.
+
+        Side effect: returns file position to record_start.
+        """
+
+        self.filepath_or_buffer.seek(0, 2)
+        total_records_length = self.filepath_or_buffer.tell() - self.record_start
+
+        if total_records_length % 80 != 0:
+            warnings.warn("xport file may be corrupted")
+
+        if self.record_length > 80:
+            self.filepath_or_buffer.seek(self.record_start)
+            return total_records_length // self.record_length
+
+        self.filepath_or_buffer.seek(-80, 2)
+        last_card = self.filepath_or_buffer.read(80)
+        last_card = np.frombuffer(last_card, dtype=np.uint64)
+
+        # 8 byte blank
+        ix = np.flatnonzero(last_card == 2314885530818453536)
+
+        if len(ix) == 0:
+            tail_pad = 0
+        else:
+            tail_pad = 8 * len(ix)
+
+        self.filepath_or_buffer.seek(self.record_start)
+
+        return (total_records_length - tail_pad) // self.record_length
+
+    def get_chunk(self, size=None):
+        """
+        Reads lines from Xport file and returns as dataframe
+
+        Parameters
+        ----------
+        size : int, defaults to None
+            Number of lines to read.  If None, reads whole file.
+
+        Returns
+        -------
+        DataFrame
+        """
+        if size is None:
+            size = self._chunksize
+        return self.read(nrows=size)
+
+    def _missing_double(self, vec):
+        v = vec.view(dtype="u1,u1,u2,u4")
+        miss = (v["f1"] == 0) & (v["f2"] == 0) & (v["f3"] == 0)
+        miss1 = (
+            ((v["f0"] >= 0x41) & (v["f0"] <= 0x5A))
+            | (v["f0"] == 0x5F)
+            | (v["f0"] == 0x2E)
+        )
+        miss &= miss1
+        return miss
+
+    @Appender(_read_method_doc)
+    def read(self, nrows=None):
+
+        if nrows is None:
+            nrows = self.nobs
+
+        read_lines = min(nrows, self.nobs - self._lines_read)
+        read_len = read_lines * self.record_length
+        if read_len <= 0:
+            self.close()
+            raise StopIteration
+        raw = self.filepath_or_buffer.read(read_len)
+        data = np.frombuffer(raw, dtype=self._dtype, count=read_lines)
+
+        df = pd.DataFrame(index=range(read_lines))
+        for j, x in enumerate(self.columns):
+            vec = data["s%d" % j]
+            ntype = self.fields[j]["ntype"]
+            if ntype == "numeric":
+                vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"])
+                miss = self._missing_double(vec)
+                v = _parse_float_vec(vec)
+                v[miss] = np.nan
+            elif self.fields[j]["ntype"] == "char":
+                v = [y.rstrip() for y in vec]
+
+                if self._encoding is not None:
+                    v = [y.decode(self._encoding) for y in v]
+
+            df[x] = v
+
+        if self._index is None:
+            df.index = range(self._lines_read, self._lines_read + read_lines)
+        else:
+            df = df.set_index(self._index)
+
+        self._lines_read += read_lines
+
+        return df
--- a/venv/lib/python3.6/site-packages/pandas/io/sas/sasreader.py
+++ b/venv/lib/python3.6/site-packages/pandas/io/sas/sasreader.py
@@ -0,0 +1,86 @@
+"""
+Read SAS sas7bdat or xport files.
+"""
+from pandas.io.common import _stringify_path
+
+
+def read_sas(
+    filepath_or_buffer,
+    format=None,
+    index=None,
+    encoding=None,
+    chunksize=None,
+    iterator=False,
+):
+    """
+    Read SAS files stored as either XPORT or SAS7BDAT format files.
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, path object or file-like object
+        Any valid string path is acceptable. The string could be a URL. Valid
+        URL schemes include http, ftp, s3, and file. For file URLs, a host is
+        expected. A local file could be:
+        ``file://localhost/path/to/table.sas``.
+
+        If you want to pass in a path object, pandas accepts any
+        ``os.PathLike``.
+
+        By file-like object, we refer to objects with a ``read()`` method,
+        such as a file handler (e.g. via builtin ``open`` function)
+        or ``StringIO``.
+    format : string {'xport', 'sas7bdat'} or None
+        If None, file format is inferred from file extension. If 'xport' or
+        'sas7bdat', uses the corresponding format.
+    index : identifier of index column, defaults to None
+        Identifier of column that should be used as index of the DataFrame.
+    encoding : string, default is None
+        Encoding for text data.  If None, text data are stored as raw bytes.
+    chunksize : int
+        Read file `chunksize` lines at a time, returns iterator.
+    iterator : bool, defaults to False
+        If True, returns an iterator for reading the file incrementally.
+
+    Returns
+    -------
+    DataFrame if iterator=False and chunksize=None, else SAS7BDATReader
+    or XportReader
+    """
+    if format is None:
+        buffer_error_msg = (
+            "If this is a buffer object rather "
+            "than a string name, you must specify "
+            "a format string"
+        )
+        filepath_or_buffer = _stringify_path(filepath_or_buffer)
+        if not isinstance(filepath_or_buffer, str):
+            raise ValueError(buffer_error_msg)
+        fname = filepath_or_buffer.lower()
+        if fname.endswith(".xpt"):
+            format = "xport"
+        elif fname.endswith(".sas7bdat"):
+            format = "sas7bdat"
+        else:
+            raise ValueError("unable to infer format of SAS file")
+
+    if format.lower() == "xport":
+        from pandas.io.sas.sas_xport import XportReader
+
+        reader = XportReader(
+            filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
+        )
+    elif format.lower() == "sas7bdat":
+        from pandas.io.sas.sas7bdat import SAS7BDATReader
+
+        reader = SAS7BDATReader(
+            filepath_or_buffer, index=index, encoding=encoding, chunksize=chunksize
+        )
+    else:
+        raise ValueError("unknown SAS format")
+
+    if iterator or chunksize:
+        return reader
+
+    data = reader.read()
+    reader.close()
+    return data