sinxadmin 发表于 2013-1-30 01:17:20

Data

Example 6-1. Exploring common OS module data methodsIn : import osIn : os.getcwd()Out: '/private/tmp'In : os.mkdir("/tmp/os_mod_explore")In : os.listdir("/tmp/os_mod_explore")Out: []In : os.mkdir("/tmp/os_mod_explore/test_dir1")In : os.listdir("/tmp/os_mod_explore")Out: ['test_dir1']In : os.stat("/tmp/os_mod_explore")Out: (16877, 6029306L, 234881026L, 3, 501, 0, 102L,1207014425, 1207014398, 1207014398)In : os.rename("/tmp/os_mod_explore/test_dir1","/tmp/os_mod_explore/test_dir1_renamed")In : os.listdir("/tmp/os_mod_explore")Out: ['test_dir1_renamed']In : os.rmdir("/tmp/os_mod_explore/test_dir1_renamed")In : os.rmdir("/tmp/os_mod_explore/")Example 6-2. Using the shutil module to copy a data treeIn : import osIn : os.chdir("/tmp")In : os.makedirs("test/test_subdir1/test_subdir2")In : ls -lRtotal 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/./test:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test/test_subdir1/test_subdir2:In : import shutilIn : shutil.copytree("test", "test-copy")In : ls -lRtotal 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test-copy/./test:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test/test_subdir1/test_subdir2:./test-copy:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test-copy/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test-copy/test_subdir1/test_subdir2:Example 6-3. Moving a data tree with shutilIn : shutil.move("test-copy", "test-copy-moved")In : ls -lRtotal 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test-copy-moved/./test:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test/test_subdir1/test_subdir2:./test-copy-moved:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test-copy-moved/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test-copy-moved/test_subdir1/test_subdir2:Example 6-4. Deleting a data tree with shutilIn : shutil.rmtree("test-copy-moved")In : shutil.rmtree("test-copy")In : ll    Example 6-5. Verbose directory walking scriptimport ospath = "/tmp"def enumeratepaths(path=path):      """Returns the path to all the files in a directory recursively"""      path_collection = []      for dirpath, dirnames, filenames in os.walk(path):            for file in filenames:                fullpath = os.path.join(dirpath, file)            path_collection.append(fullpath)      return path_collectiondef enumeratefiles(path=path):      """Returns all the files in a directory as a list"""      file_collection = []      for dirpath, dirnames, filenames in os.walk(path):            for file in filenames:                file_collection.append(file)      return file_collectiondef enumeratedir(path=path):      """Returns all the directories in a directory as a list"""      dir_collection = []      for dirpath, dirnames, filenames in os.walk(path):            for dir in dirnames:                dir_collection.append(dir)      return dir_collectionif __name__ == "__main__":            print "\nRecursive listing of all paths in a dir:"            for path in enumeratepaths():                print path                print "\nRecursive listing of all files in dir:"            for file in enumeratefiles():                print file                print "\nRecursive listing of all dirs in dir:"            for dir in enumeratedir():                print dirExample 6-6. Creating reusable directory walking moduleimport osclass diskwalk(object):"""API for getting directory walking collections"""def __init__(self, path):    self.path = pathdef enumeratePaths(self):"""Returns the path to all the files in a directory as a list"""    path_collection = []    for dirpath, dirnames, filenames in os.walk(self.path):      for file in filenames:            fullpath = os.path.join(dirpath, file)            path_collection.append(fullpath)    return path_collectiondef enumerateFiles(self):"""Returns all the files in a directory as a list"""    file_collection = []    for dirpath, dirnames, filenames in os.walk(self.path):      for file in filenames:            file_collection.append(file)    return file_collectiondef enumerateDir(self):"""Returns all the directories in a directory as a list"""    dir_collection = []    for dirpath, dirnames, filenames in os.walk(self.path):      for dir in dirnames:            dir_collection.append(dir)    return dir_collectionExample 6-7. Performing an MD5 checksum on filesimport hashlibdef create_checksum(path):"""Reads in file. Creates checksum of file line by line.Returns complete checksum total for file."""    fp = open(path)    checksum = hashlib.md5()    while True:      buffer = fp.read(8192)      if not buffer:break      checksum.update(buffer)    fp.close()    checksum = checksum.digest()    return checksumExample 6-8. Performing an MD5 checksum on a directory tree to find duplicatesIn : from checksum import createChecksumIn : from diskwalk_api import diskwalkIn : d = diskwalk('/tmp/duplicates_directory')In : files = d.enumeratePaths()In : len(files)Out: 12In : dup = []In : record = {}In : for file in files:compound_key = (getsize(file),create_checksum(file))if compound_key in record:dup.append(file)else:record = file....:....:In : print dup['/tmp/duplicates_directory/image2']Example 6-9. Finding duplicatesfrom checksum import create_checksumfrom diskwalk_api import diskwalkfrom os.path import getsizedef findDupes(path = '/tmp'):    dup = []    record = {}    d = diskwalk(path)    files = d.enumeratePaths()    for file in files:      compound_key = (getsize(file),create_checksum(file))      if compound_key in record:            dup.append(file)      else:            #print "Creating compound key record:", compound_key            record = file    return dupif __name__ == "__main__":    dupes = findDupes()    for dup in dupes:    print “Duplicate: %s” % dupExample 6-10. Delete module#!/usr/bin/env pythonimport osclass Delete(object):    """Delete Methods For File Objects"""    def __init__(self, file):      self.file = file    def interactive(self):    """interactive deletion mode"""    input = raw_input("Do you really want to delete %s /Y" % self.file)    if input.upper():      print "DELETING: %s" % self.file      status = os.remove(self.file)    else:      print "Skipping: %s" % self.file    return    def dryrun(self):      """simulation mode for deletion"""      print "Dry Run: %s " % self.file      return    def delete(self):      """Performs a delete on a file, with additional conditions      """      print "DELETING: %s" % self.file      try:            status = os.remove(self.file)      except Exception, err:            print err            return statusif __name__ == "__main__":    from find_dupes import findDupes    dupes = findDupes('/tmp')    for dupe in dupes:      delete = Delete(dupe)#delete.dryrun()#delete.delete()#delete.interactive()Example 6-11. Interactively using fnmatch and glob to search for file matchesIn : from diskwalk_api import diskwalkIn : files = diskwalk("/tmp")In : from fnmatch import fnmatchIn : for file in files:...: if fnmatch(file,"*.txt"):...: print file...:...:/tmp/file.txtIn : from glob import globIn : import osIn : os.chdir("/tmp")In : glob("*")Out: ['file.txt', 'image.iso', 'music.mp3']Example 6-12. Renaming a tree full of MP3 files to text filesIn : from diskwalk_api import diskwalkIn : from shutil import moveIn : from fnmatch import fnmatchIn : files = diskwalk("/tmp")In : for file in files:if fnmatch(file, "*.mp3"):#here we can do anything we want, delete, move, rename...hmmm renamemove(file, "%s.txt" % file)In : ls -l /tmp/total 0-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 file.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 image.iso-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 music.mp3.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music1.mp3.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music2.mp3.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music3.mp3.txtExample 6-13. Simple wrap of rsync#!/usr/bin/env python#wraps up rsync to synchronize two directoriesfrom subprocess import callimport syssource = "/tmp/sync_dir_A/" #Note the trailing slashtarget = "/tmp/sync_dir_B"rsync = "rsync"arguments = "-a"cmd = "%s %s %s %s" % (rsync, arguments, source, target)def sync():    ret = call(cmd, shell=True)    if ret !=0:    print "rsync failed"    sys.exit(1)sync()Example 6-14. An rsync command that doesn’t quit until the job is finished#!/usr/bin/env python#wraps up rsync to synchronize two directoriesfrom subprocess import callimport sysimport time"""this motivated rsync tries to synchronize forever"""source = "/tmp/sync_dir_A/" #Note the trailing slashtarget = "/tmp/sync_dir_B"rsync = "rsync"arguments = "-av"cmd = "%s %s %s %s" % (rsync, arguments, source, target)def sync():while True:    ret = call(cmd, shell=True)    if ret !=0:      print "resubmitting rsync"      time.sleep(30)    else:      print "rsync was succesful"      subprocess.call("mail -s 'jobs done' bofh@example.com", shell=True)      sys.exit(0)sync()Example 6-15. Creating metadata about a filesystem with SQLAlchemy#!/usr/bin/env pythonfrom sqlalchemy import create_enginefrom sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKeyfrom sqlalchemy.orm import mapper, sessionmakerimport os#pathpath = " /tmp"#Part 1: create engineengine = create_engine('sqlite:///:memory:', echo=False)#Part 2: metadatametadata = MetaData()filesystem_table = Table('filesystem', metadata,    Column('id', Integer, primary_key=True),    Column('path', String(500)),    Column('file', String(255)),)metadata.create_all(engine)#Part 3: mapped classclass Filesystem(object):    def __init__(self, path, file):      self.path = path      self.file = file    def __repr__(self):      return "" % (self.path, self.file)    #Part 4: mapper functionmapper(Filesystem,filesystem_table)#Part 5: create sessionSession = sessionmaker(bind=engine, autoflush=True, transactional=True)session = Session()#Part 6: crawl file system and populate database with resultsfor dirpath, dirnames, filenames in os.walk(path):    for file in filenames:      fullpath = os.path.join(dirpath, file)      record = Filesystem(fullpath, file)      session.save(record)#Part 7: commit to the databasesession.commit()#Part 8: queryfor record in session.query(Filesystem):    print "Database Record Number: %s, Path: %s , File: %s " \      % (record.id,record.path, record.file)Example 6-16. Create big text fileIn : f = open("largeFile.txt", "w")In : statement = "This is a big line that I intend to write over and over again."ln : x = 0In : for x in xrange(20000):....: x += 1....: f.write("%s\n" % statement)....:....:In : ls -l-rw-r--r-- 1 root root 1236992 Oct 25 23:13 largeFile.txtExample 6-17. TAR up contents of fileIn : import tarfileIn : tar = tarfile.open("largefile.tar", "w")In : tar.add("largeFile.txt")In : tar.close()Example 6-18. TAR up contents of a directory treeIn : import tarfileIn : tar = tarfile.open("temp.tar", "w")In : import osIn : for root, dir, files in os.walk("/tmp"):....: for file in filenames:....:KeyboardInterruptIn : for root, dir, files in os.walk("/tmp"):for file in files:....: fullpath = os.path.join(root,file)....: tar.add(fullpath)....:....:In : tar.close()Example 6-19. Creating bzip2 TAR archiveIn : tar.add("largeFile.txt")In : ls -hfoo1.txt fooDir1/ largeFile.txt largefilecompressed.tar.bzip2*foo2.txt fooDir2/ largefile.tarln : tar.close()In : ls -lh-rw-r--r-- 1 root root 61M Oct 25 23:15 largeFile.txt-rw-r--r-- 1 root root 61M Oct 26 00:39 largefile.tar-rwxr-xr-x 1 root root 10K Oct 26 01:02 largefilecompressed.tar.bzip2*Example 6-20. Creating a gzip TAR archiveIn : tar = tarfile.open("largefile.tar.gzip", "w|gz")In : tar.add("largeFile.txt")ln : tar.close()In : ls -lh-rw-r--r-- 1 root root 61M Oct 26 01:20 largeFile.txt-rw-r--r-- 1 root root 61M Oct 26 00:39 largefile.tar-rwxr-xr-x 1 root root 160K Oct 26 01:24 largefile.tar.gzip*
页: [1]
查看完整版本: Data