Data
Example 6-1. Exploring common OS module data methodsIn : import osIn : os.getcwd()Out: '/private/tmp'In : os.mkdir("/tmp/os_mod_explore")In : os.listdir("/tmp/os_mod_explore")Out: []In : os.mkdir("/tmp/os_mod_explore/test_dir1")In : os.listdir("/tmp/os_mod_explore")Out: ['test_dir1']In : os.stat("/tmp/os_mod_explore")Out: (16877, 6029306L, 234881026L, 3, 501, 0, 102L,1207014425, 1207014398, 1207014398)In : os.rename("/tmp/os_mod_explore/test_dir1","/tmp/os_mod_explore/test_dir1_renamed")In : os.listdir("/tmp/os_mod_explore")Out: ['test_dir1_renamed']In : os.rmdir("/tmp/os_mod_explore/test_dir1_renamed")In : os.rmdir("/tmp/os_mod_explore/")Example 6-2. Using the shutil module to copy a data treeIn : import osIn : os.chdir("/tmp")In : os.makedirs("test/test_subdir1/test_subdir2")In : ls -lRtotal 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/./test:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test/test_subdir1/test_subdir2:In : import shutilIn : shutil.copytree("test", "test-copy")In : ls -lRtotal 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test-copy/./test:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test/test_subdir1/test_subdir2:./test-copy:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test-copy/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test-copy/test_subdir1/test_subdir2:Example 6-3. Moving a data tree with shutilIn : shutil.move("test-copy", "test-copy-moved")In : ls -lRtotal 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test/drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test-copy-moved/./test:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test/test_subdir1/test_subdir2:./test-copy-moved:total 0drwxr-xr-x 3 ngift wheel 102 Mar 31 22:27 test_subdir1/./test-copy-moved/test_subdir1:total 0drwxr-xr-x 2 ngift wheel 68 Mar 31 22:27 test_subdir2/./test-copy-moved/test_subdir1/test_subdir2:Example 6-4. Deleting a data tree with shutilIn : shutil.rmtree("test-copy-moved")In : shutil.rmtree("test-copy")In : ll Example 6-5. Verbose directory walking scriptimport ospath = "/tmp"def enumeratepaths(path=path): """Returns the path to all the files in a directory recursively""" path_collection = [] for dirpath, dirnames, filenames in os.walk(path): for file in filenames: fullpath = os.path.join(dirpath, file) path_collection.append(fullpath) return path_collectiondef enumeratefiles(path=path): """Returns all the files in a directory as a list""" file_collection = [] for dirpath, dirnames, filenames in os.walk(path): for file in filenames: file_collection.append(file) return file_collectiondef enumeratedir(path=path): """Returns all the directories in a directory as a list""" dir_collection = [] for dirpath, dirnames, filenames in os.walk(path): for dir in dirnames: dir_collection.append(dir) return dir_collectionif __name__ == "__main__": print "\nRecursive listing of all paths in a dir:" for path in enumeratepaths(): print path print "\nRecursive listing of all files in dir:" for file in enumeratefiles(): print file print "\nRecursive listing of all dirs in dir:" for dir in enumeratedir(): print dirExample 6-6. Creating reusable directory walking moduleimport osclass diskwalk(object):"""API for getting directory walking collections"""def __init__(self, path): self.path = pathdef enumeratePaths(self):"""Returns the path to all the files in a directory as a list""" path_collection = [] for dirpath, dirnames, filenames in os.walk(self.path): for file in filenames: fullpath = os.path.join(dirpath, file) path_collection.append(fullpath) return path_collectiondef enumerateFiles(self):"""Returns all the files in a directory as a list""" file_collection = [] for dirpath, dirnames, filenames in os.walk(self.path): for file in filenames: file_collection.append(file) return file_collectiondef enumerateDir(self):"""Returns all the directories in a directory as a list""" dir_collection = [] for dirpath, dirnames, filenames in os.walk(self.path): for dir in dirnames: dir_collection.append(dir) return dir_collectionExample 6-7. Performing an MD5 checksum on filesimport hashlibdef create_checksum(path):"""Reads in file. Creates checksum of file line by line.Returns complete checksum total for file.""" fp = open(path) checksum = hashlib.md5() while True: buffer = fp.read(8192) if not buffer:break checksum.update(buffer) fp.close() checksum = checksum.digest() return checksumExample 6-8. Performing an MD5 checksum on a directory tree to find duplicatesIn : from checksum import createChecksumIn : from diskwalk_api import diskwalkIn : d = diskwalk('/tmp/duplicates_directory')In : files = d.enumeratePaths()In : len(files)Out: 12In : dup = []In : record = {}In : for file in files:compound_key = (getsize(file),create_checksum(file))if compound_key in record:dup.append(file)else:record = file....:....:In : print dup['/tmp/duplicates_directory/image2']Example 6-9. Finding duplicatesfrom checksum import create_checksumfrom diskwalk_api import diskwalkfrom os.path import getsizedef findDupes(path = '/tmp'): dup = [] record = {} d = diskwalk(path) files = d.enumeratePaths() for file in files: compound_key = (getsize(file),create_checksum(file)) if compound_key in record: dup.append(file) else: #print "Creating compound key record:", compound_key record = file return dupif __name__ == "__main__": dupes = findDupes() for dup in dupes: print “Duplicate: %s” % dupExample 6-10. Delete module#!/usr/bin/env pythonimport osclass Delete(object): """Delete Methods For File Objects""" def __init__(self, file): self.file = file def interactive(self): """interactive deletion mode""" input = raw_input("Do you really want to delete %s /Y" % self.file) if input.upper(): print "DELETING: %s" % self.file status = os.remove(self.file) else: print "Skipping: %s" % self.file return def dryrun(self): """simulation mode for deletion""" print "Dry Run: %s " % self.file return def delete(self): """Performs a delete on a file, with additional conditions """ print "DELETING: %s" % self.file try: status = os.remove(self.file) except Exception, err: print err return statusif __name__ == "__main__": from find_dupes import findDupes dupes = findDupes('/tmp') for dupe in dupes: delete = Delete(dupe)#delete.dryrun()#delete.delete()#delete.interactive()Example 6-11. Interactively using fnmatch and glob to search for file matchesIn : from diskwalk_api import diskwalkIn : files = diskwalk("/tmp")In : from fnmatch import fnmatchIn : for file in files:...: if fnmatch(file,"*.txt"):...: print file...:...:/tmp/file.txtIn : from glob import globIn : import osIn : os.chdir("/tmp")In : glob("*")Out: ['file.txt', 'image.iso', 'music.mp3']Example 6-12. Renaming a tree full of MP3 files to text filesIn : from diskwalk_api import diskwalkIn : from shutil import moveIn : from fnmatch import fnmatchIn : files = diskwalk("/tmp")In : for file in files:if fnmatch(file, "*.mp3"):#here we can do anything we want, delete, move, rename...hmmm renamemove(file, "%s.txt" % file)In : ls -l /tmp/total 0-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 file.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 image.iso-rw-r--r-- 1 ngift wheel 0 Apr 1 21:50 music.mp3.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music1.mp3.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music2.mp3.txt-rw-r--r-- 1 ngift wheel 0 Apr 1 22:45 music3.mp3.txtExample 6-13. Simple wrap of rsync#!/usr/bin/env python#wraps up rsync to synchronize two directoriesfrom subprocess import callimport syssource = "/tmp/sync_dir_A/" #Note the trailing slashtarget = "/tmp/sync_dir_B"rsync = "rsync"arguments = "-a"cmd = "%s %s %s %s" % (rsync, arguments, source, target)def sync(): ret = call(cmd, shell=True) if ret !=0: print "rsync failed" sys.exit(1)sync()Example 6-14. An rsync command that doesn’t quit until the job is finished#!/usr/bin/env python#wraps up rsync to synchronize two directoriesfrom subprocess import callimport sysimport time"""this motivated rsync tries to synchronize forever"""source = "/tmp/sync_dir_A/" #Note the trailing slashtarget = "/tmp/sync_dir_B"rsync = "rsync"arguments = "-av"cmd = "%s %s %s %s" % (rsync, arguments, source, target)def sync():while True: ret = call(cmd, shell=True) if ret !=0: print "resubmitting rsync" time.sleep(30) else: print "rsync was succesful" subprocess.call("mail -s 'jobs done' bofh@example.com", shell=True) sys.exit(0)sync()Example 6-15. Creating metadata about a filesystem with SQLAlchemy#!/usr/bin/env pythonfrom sqlalchemy import create_enginefrom sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKeyfrom sqlalchemy.orm import mapper, sessionmakerimport os#pathpath = " /tmp"#Part 1: create engineengine = create_engine('sqlite:///:memory:', echo=False)#Part 2: metadatametadata = MetaData()filesystem_table = Table('filesystem', metadata, Column('id', Integer, primary_key=True), Column('path', String(500)), Column('file', String(255)),)metadata.create_all(engine)#Part 3: mapped classclass Filesystem(object): def __init__(self, path, file): self.path = path self.file = file def __repr__(self): return "" % (self.path, self.file) #Part 4: mapper functionmapper(Filesystem,filesystem_table)#Part 5: create sessionSession = sessionmaker(bind=engine, autoflush=True, transactional=True)session = Session()#Part 6: crawl file system and populate database with resultsfor dirpath, dirnames, filenames in os.walk(path): for file in filenames: fullpath = os.path.join(dirpath, file) record = Filesystem(fullpath, file) session.save(record)#Part 7: commit to the databasesession.commit()#Part 8: queryfor record in session.query(Filesystem): print "Database Record Number: %s, Path: %s , File: %s " \ % (record.id,record.path, record.file)Example 6-16. Create big text fileIn : f = open("largeFile.txt", "w")In : statement = "This is a big line that I intend to write over and over again."ln : x = 0In : for x in xrange(20000):....: x += 1....: f.write("%s\n" % statement)....:....:In : ls -l-rw-r--r-- 1 root root 1236992 Oct 25 23:13 largeFile.txtExample 6-17. TAR up contents of fileIn : import tarfileIn : tar = tarfile.open("largefile.tar", "w")In : tar.add("largeFile.txt")In : tar.close()Example 6-18. TAR up contents of a directory treeIn : import tarfileIn : tar = tarfile.open("temp.tar", "w")In : import osIn : for root, dir, files in os.walk("/tmp"):....: for file in filenames:....:KeyboardInterruptIn : for root, dir, files in os.walk("/tmp"):for file in files:....: fullpath = os.path.join(root,file)....: tar.add(fullpath)....:....:In : tar.close()Example 6-19. Creating bzip2 TAR archiveIn : tar.add("largeFile.txt")In : ls -hfoo1.txt fooDir1/ largeFile.txt largefilecompressed.tar.bzip2*foo2.txt fooDir2/ largefile.tarln : tar.close()In : ls -lh-rw-r--r-- 1 root root 61M Oct 25 23:15 largeFile.txt-rw-r--r-- 1 root root 61M Oct 26 00:39 largefile.tar-rwxr-xr-x 1 root root 10K Oct 26 01:02 largefilecompressed.tar.bzip2*Example 6-20. Creating a gzip TAR archiveIn : tar = tarfile.open("largefile.tar.gzip", "w|gz")In : tar.add("largeFile.txt")ln : tar.close()In : ls -lh-rw-r--r-- 1 root root 61M Oct 26 01:20 largeFile.txt-rw-r--r-- 1 root root 61M Oct 26 00:39 largefile.tar-rwxr-xr-x 1 root root 160K Oct 26 01:24 largefile.tar.gzip*
页:
[1]