osdir.com


[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

7z archive reader akin to zipfile?


On Wednesday, January 3, 2018 at 1:11:31 PM UTC-5, Skip Montanaro wrote:
> The zipfile module is kind of cool because you can access elements of
> the archive without explicitly uncompressing the entire archive and
> writing the structure to disk. I've got some 7z archives I'd like to
> treat the same way (read specific elements without first extractingg
> the entire tree to disk). I see the pylzma module for compressing and
> uncompressing files, but nothing slightly higher level. Does something
> like that exist?
> 
> Thx,
> 
> Skip

I made this wrapper class for 7zip. It might be useful for you.

#python wrapper for 7zip
import os
import zlib
from subprocess import Popen, PIPE
"""
p = Popen(['program', 'arg1'], stdin=PIPE, stdout=PIPE, stderr=PIPE)
output, err = p.communicate(b"input data that is passed to subprocess' stdin")
rc = p.returncode
"""

SEVEN_ZIP_PATH = "C:/Program Files/7-Zip/7z.exe" 

class SevenZip(object):
    
    def __new__(cls, ZipProgram=SEVEN_ZIP_PATH):
        if os.path.isfile(ZipProgram):
            return super(SevenZip, cls).__new__(cls)
        else:
            raise ValueError("7zip program not found in %s" %(ZipProgram))
        
    def __init__(self, ZipProgram=SEVEN_ZIP_PATH):
        self.ZipProgram = ZipProgram
        self.archive = None
        self.outputDir = None
        self.fileList = []
        self.archiveType = "zip"
            
                
    def call(self, cmdList=None):
        """ Used by the other methods to call the 7zip command line. 
        Can be used directly to run 7zip if the wrapper methods don't suffice.
        
        cmdList -- Subprocess style list of command line options with
        the first item in the list being self.ZipProgram
        """
        if cmdList is not None:
            zip7 = Popen(cmdList, stdin=PIPE, stdout=PIPE, stderr=PIPE )
            output ,err = zip7.communicate()
            rc = zip7.returncode
            print "output" , output
            print "return code", rc
            if len(err) > 0:
                print "errors found", err
                
   
    def modify(self, archive=None, fileList=None, cmd=None):
        """ Modify an archive (add, delete or update)
        [optional]
        archive -- the zip file
        fileList -- a list of file paths or a single filepath
        cmd -- 'a': add, 'd': delete or 'u': update
        """
        if not cmd in ['a','u','d']:
            raise ValueError("Invalid command %s" %cmd)
        if fileList is not None:
            if type(fileList) is list:
                self.fileList = fileList
            else:
                self.fileList = [fileList]
        """
        for f in self.fileList:
            if not (os.path.isfile(f) or os.path.isdir(f)):
                    raise ValueError("File %s not found" %f)
        """
                
        if archive is not None:
            self.archive = archive
        if self.archive is not None:
            if os.path.isfile(self.archive) or cmd == 'a':
                cmdList = [self.ZipProgram, cmd, '-y'] 
                if self.archiveType is not None:
                    cmdList.append("-t"+self.archiveType)
                cmdList.append(self.archive)
                cmdList.extend(self.fileList)
                print cmdList
                self.call(cmdList)
            else:
                raise ValueError("Archive not found in %s" %(self.archive))             
    
        
    def usage(self):
        """ Returns the 7zip command line usage text. 
        These options can be accessed directly with call.
        
        7-Zip [64] 9.20  Copyright (c) 1999-2010 Igor Pavlov  2010-11-18

            Usage: 7z <command> [<switches>...] <archive_name> [<file_names>...]
                   [<@listfiles...>]
            
            <Commands>
              a: Add files to archive
              b: Benchmark
              d: Delete files from archive
              e: Extract files from archive (without using directory names)
              l: List contents of archive
              t: Test integrity of archive
              u: Update files to archive
              x: eXtract files with full paths
            <Switches>
              -ai[r[-|0]]{@listfile|!wildcard}: Include archives
              -ax[r[-|0]]{@listfile|!wildcard}: eXclude archives
              -bd: Disable percentage indicator
              -i[r[-|0]]{@listfile|!wildcard}: Include filenames
              -m{Parameters}: set compression Method
              -o{Directory}: set Output directory
              -p{Password}: set Password
              -r[-|0]: Recurse subdirectories
              -scs{UTF-8 | WIN | DOS}: set charset for list files
              -sfx[{name}]: Create SFX archive
              -si[{name}]: read data from stdin
              -slt: show technical information for l (List) command
              -so: write data to stdout
              -ssc[-]: set sensitive case mode
              -ssw: compress shared files
              -t{Type}: Set type of archive
              -u[-][p#][q#][r#][x#][y#][z#][!newArchiveName]: Update options
              -v{Size}[b|k|m|g]: Create volumes
              -w[{path}]: assign Work directory. Empty path means a temporary directory
              -x[r[-|0]]]{@listfile|!wildcard}: eXclude filenames
              -y: assume Yes on all queries
                    """
        cmdList = [self.ZipProgram]
        self.call(cmdList)

    
    def add(self, archive=None, fileList=None):
        """ Add a file or list of files to an archive
        [optional]
        archive -- the zip file
        fileList -- a list of file paths or a single filepath
        """
        self.modify(archive, fileList, 'a')
        
    def benchmark(self, archive):
        print "not implemented"
        
    def delete(self, archive=None, fileList=None):
        """ delete a file or list of files to an archive
        [optional]
        archive -- the zip file
        fileList -- a list of file paths or a single filepath
        """
        self.modify(archive, fileList, 'd')
   
    def list(self, archive=None):
        """ List the contents of an archive
        [optional]
        archive -- the zip file
        """
        if archive is not None:
            self.archive = archive
        if self.archive is not None:
            if os.path.isfile(self.archive):
                cmdList = [self.ZipProgram, "l", self.archive]
                self.call(cmdList)
            else:
                raise ValueError("Archive not found in %s" %(self.archive))
             
    def test(self, archive=None):
        """ Test an archive for errors
        [optional]
        archive -- the zip file
        """
        if archive is not None:
            self.archive = archive
        if self.archive is not None:
            if os.path.isfile(self.archive):
                cmdList = [self.ZipProgram, "t", self.archive]
                self.call(cmdList)
            else:
                raise ValueError("Archive not found in %s" %(self.archive))
   
    def update(self, archive=None, fileList=None):
        """ Update a file or list of files to an archive 
        only if the file does not exist or is newer than the existing file
        [optional]
        archive -- the zip file
        fileList -- a list of file paths or a single filepath
        """
        self.modify(archive, fileList, 'u')
   
    def extract(self, archive=None, fullpath=True, outputDir=None):
        """ extract the contents of an archive
        [optional]
        archive -- the zip file
        fullpath -- extract with fullpaths
        outputDir -- specify the output directory 
        """
        cmdList = [self.ZipProgram]
        if fullpath:
            cmdList.append('x')
        else:
            cmdList.append('e')
        
        cmdList.append('-y')
        if outputDir is not None:
            self.outputDir = outputDir
        if self.outputDir is not None:
            cmdList.append('-o'+self.outputDir)
            
        if archive is not None:
            self.archive = archive
        if self.archive is not None:
            if os.path.isfile(self.archive):
                cmdList.append(self.archive)
                print cmdList
                self.call(cmdList)
            else:
                raise ValueError("Archive not found in %s" %(self.archive))
            
    def crc(self, archive=None):
        """ Return the checksum of the archive
        archive -- the zip file
        """
        if archive is not None:
            self.archive = archive
        if self.archive is not None:
            if os.path.isfile(self.archive):
                prev = 0
                f = open(self.archive,"rb")
                for eachLine in f:
                    prev = zlib.crc32(eachLine, prev)
                f.close()
                return "%X"%(prev & 0xFFFFFFFF)
            else:
                raise ValueError("Archive not found in %s" %(self.archive))