Category Archives: Python

Python defines a full vector class

first edition:

# BEGIN VECTOR_V1
from array import array
import reprlib
import math


class Vector:
    typecode = 'd'

    def __init__(self, components):
        self._components = array(self.typecode, components)  # <1>

    def __iter__(self):
        return iter(self._components)  # <2>

    def __repr__(self):
        components = reprlib.repr(self._components)  # <3>
        components = components[components.find('['):-1]  # <4>
        return 'Vector({})'.format(components)

    def __str__(self):
        return str(tuple(self))

    def __bytes__(self):
        return (bytes([ord(self.typecode)]) +
                bytes(self._components))  # <5>

    def __eq__(self, other):
        return tuple(self) == tuple(other)

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))  # <6>

    def __bool__(self):
        return bool(abs(self))

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)  # <7>
# END VECTOR_V1

second edition: slicable sequence

from array import array
import reprlib
import math
import numbers


class Vector:
    typecode = 'd'

    def __init__(self, components):
        self._components = array(self.typecode, components)

    def __iter__(self):
        return iter(self._components)

    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)

    def __str__(self):
        return str(tuple(self))

    def __bytes__(self):
        return (bytes([ord(self.typecode)]) +
                bytes(self._components))

    def __eq__(self, other):
        return tuple(self) == tuple(other)

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))

    def __bool__(self):
        return bool(abs(self))

# BEGIN VECTOR_V2
    def __len__(self):
        return len(self._components)

    def __getitem__(self, index):
        cls = type(self)  # <1>
        if isinstance(index, slice):  # <2>
            return cls(self._components[index])  # <3>
        elif isinstance(index, numbers.Integral):  # <4>
            return self._components[index]  # <5>
        else:
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))  # <6>
# END VECTOR_V2

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

The biggest change in

V2 is the implementation of ___ and ___, thereby enabling slicing, both of which are also necessary for vector to behave as a sequence:

if [1:4] is used, a slice is returned. Slice is a built-in type. A review of the slice reveals that it has the start,stop, step data attributes, and the indices methods. In indices, given a sequence of len, indexes at the beginning and the end of an extension section marked with S, as well as the stride length, are computed. Indexes exceeding the boundary are truncated.

Vector version 3: dynamic access property

from array import array
import reprlib
import math
import numbers


class Vector:
    typecode = 'd'

    def __init__(self, components):
        self._components = array(self.typecode, components)

    def __iter__(self):
        return iter(self._components)

    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)

    def __str__(self):
        return str(tuple(self))

    def __bytes__(self):
        return (bytes([ord(self.typecode)]) +
                bytes(self._components))

    def __eq__(self, other):
        return tuple(self) == tuple(other)

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))

    def __bool__(self):
        return bool(abs(self))

    def __len__(self):
        return len(self._components)

    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):
            return cls(self._components[index])
        elif isinstance(index, numbers.Integral):
            return self._components[index]
        else:
            msg = '{.__name__} indices must be integers'
            raise TypeError(msg.format(cls))

# BEGIN VECTOR_V3_GETATTR
    shortcut_names = 'xyzt'

    def __getattr__(self, name):
        cls = type(self)  # <1>
        if len(name) == 1:  # <2>
            pos = cls.shortcut_names.find(name)  # <3>
            if 0 <= pos < len(self._components):  # <4>
                return self._components[pos]
        msg = '{.__name__!r} object has no attribute {!r}'  # <5>
        raise AttributeError(msg.format(cls, name))
# END VECTOR_V3_GETATTR

# BEGIN VECTOR_V3_SETATTR
    def __setattr__(self, name, value):
        cls = type(self)
        if len(name) == 1:  # <1>
            if name in cls.shortcut_names:  # <2>
                error = 'readonly attribute {attr_name!r}'
            elif name.islower():  # <3>
                error = "can't set attributes 'a' to 'z' in {cls_name!r}"
            else:
                error = ''  # <4>
            if error:  # <5>
                msg = error.format(cls_name=cls.__name__, attr_name=name)
                raise AttributeError(msg)
        super().__setattr__(name, value)  # <6>

# END VECTOR_V3_SETATTR

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

, ___ getattr__ and ___ setattr__, the former is to get vector components and the latter is to protect the existing components. V.x cannot be directly assigned because x has become an attribute of v. The value of v.x has changed, but the value of v has not.

Vector iv: hashing and fast equivalence testing

from array import array
import reprlib
import math
import numbers
import functools
import operator


class Vector:
    typecode = 'd'

    def __init__(self, components):
        self._components = array(self.typecode, components)

    def __iter__(self):
        return iter(self._components)

    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)

    def __str__(self):
        return str(tuple(self))

    def __bytes__(self):
        return (bytes([ord(self.typecode)]) +
                bytes(self._components))

    def __eq__(self, other):
        return (len(self) == len(other) and
                all(a == b for a, b in zip(self, other)))

    def __hash__(self):
        hashes = (hash(x) for x in self)
        return functools.reduce(operator.xor, hashes, 0)

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))

    def __bool__(self):
        return bool(abs(self))

    def __len__(self):
        return len(self._components)

    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):
            return cls(self._components[index])
        elif isinstance(index, numbers.Integral):
            return self._components[index]
        else:
            msg = '{cls.__name__} indices must be integers'
            raise TypeError(msg.format(cls=cls))

    shortcut_names = 'xyzt'

    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos = cls.shortcut_names.find(name)
            if 0 <= pos < len(self._components):
                return self._components[pos]
        msg = '{.__name__!r} object has no attribute {!r}'
        raise AttributeError(msg.format(cls, name))

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)

fifth edition: format

# BEGIN VECTOR_V5
"""
A multi-dimensional ``Vector`` class, take 5

A ``Vector`` is built from an iterable of numbers::

    >>> Vector([3.1, 4.2])
    Vector([3.1, 4.2])
    >>> Vector((3, 4, 5))
    Vector([3.0, 4.0, 5.0])
    >>> Vector(range(10))
    Vector([0.0, 1.0, 2.0, 3.0, 4.0, ...])


Tests with 2-dimensions (same results as ``vector2d_v1.py``)::

    >>> v1 = Vector([3, 4])
    >>> x, y = v1
    >>> x, y
    (3.0, 4.0)
    >>> v1
    Vector([3.0, 4.0])
    >>> v1_clone = eval(repr(v1))
    >>> v1 == v1_clone
    True
    >>> print(v1)
    (3.0, 4.0)
    >>> octets = bytes(v1)
    >>> octets
    b'd\\x00\\x00\\x00\\x00\\x00\\x00\\x08@\\x00\\x00\\x00\\x00\\x00\\x00\\x10@'
    >>> abs(v1)
    5.0
    >>> bool(v1), bool(Vector([0, 0]))
    (True, False)


Test of ``.frombytes()`` class method:

    >>> v1_clone = Vector.frombytes(bytes(v1))
    >>> v1_clone
    Vector([3.0, 4.0])
    >>> v1 == v1_clone
    True


Tests with 3-dimensions::

    >>> v1 = Vector([3, 4, 5])
    >>> x, y, z = v1
    >>> x, y, z
    (3.0, 4.0, 5.0)
    >>> v1
    Vector([3.0, 4.0, 5.0])
    >>> v1_clone = eval(repr(v1))
    >>> v1 == v1_clone
    True
    >>> print(v1)
    (3.0, 4.0, 5.0)
    >>> abs(v1)  # doctest:+ELLIPSIS
    7.071067811...
    >>> bool(v1), bool(Vector([0, 0, 0]))
    (True, False)


Tests with many dimensions::

    >>> v7 = Vector(range(7))
    >>> v7
    Vector([0.0, 1.0, 2.0, 3.0, 4.0, ...])
    >>> abs(v7)  # doctest:+ELLIPSIS
    9.53939201...


Test of ``.__bytes__`` and ``.frombytes()`` methods::

    >>> v1 = Vector([3, 4, 5])
    >>> v1_clone = Vector.frombytes(bytes(v1))
    >>> v1_clone
    Vector([3.0, 4.0, 5.0])
    >>> v1 == v1_clone
    True


Tests of sequence behavior::

    >>> v1 = Vector([3, 4, 5])
    >>> len(v1)
    3
    >>> v1[0], v1[len(v1)-1], v1[-1]
    (3.0, 5.0, 5.0)


Test of slicing::

    >>> v7 = Vector(range(7))
    >>> v7[-1]
    6.0
    >>> v7[1:4]
    Vector([1.0, 2.0, 3.0])
    >>> v7[-1:]
    Vector([6.0])
    >>> v7[1,2]
    Traceback (most recent call last):
      ...
    TypeError: Vector indices must be integers


Tests of dynamic attribute access::

    >>> v7 = Vector(range(10))
    >>> v7.x
    0.0
    >>> v7.y, v7.z, v7.t
    (1.0, 2.0, 3.0)

Dynamic attribute lookup failures::

    >>> v7.k
    Traceback (most recent call last):
      ...
    AttributeError: 'Vector' object has no attribute 'k'
    >>> v3 = Vector(range(3))
    >>> v3.t
    Traceback (most recent call last):
      ...
    AttributeError: 'Vector' object has no attribute 't'
    >>> v3.spam
    Traceback (most recent call last):
      ...
    AttributeError: 'Vector' object has no attribute 'spam'


Tests of hashing::

    >>> v1 = Vector([3, 4])
    >>> v2 = Vector([3.1, 4.2])
    >>> v3 = Vector([3, 4, 5])
    >>> v6 = Vector(range(6))
    >>> hash(v1), hash(v3), hash(v6)
    (7, 2, 1)


Most hash values of non-integers vary from a 32-bit to 64-bit CPython build::

    >>> import sys
    >>> hash(v2) == (384307168202284039 if sys.maxsize > 2**32 else 357915986)
    True


Tests of ``format()`` with Cartesian coordinates in 2D::

    >>> v1 = Vector([3, 4])
    >>> format(v1)
    '(3.0, 4.0)'
    >>> format(v1, '.2f')
    '(3.00, 4.00)'
    >>> format(v1, '.3e')
    '(3.000e+00, 4.000e+00)'


Tests of ``format()`` with Cartesian coordinates in 3D and 7D::

    >>> v3 = Vector([3, 4, 5])
    >>> format(v3)
    '(3.0, 4.0, 5.0)'
    >>> format(Vector(range(7)))
    '(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0)'


Tests of ``format()`` with spherical coordinates in 2D, 3D and 4D::

    >>> format(Vector([1, 1]), 'h')  # doctest:+ELLIPSIS
    '<1.414213..., 0.785398...>'
    >>> format(Vector([1, 1]), '.3eh')
    '<1.414e+00, 7.854e-01>'
    >>> format(Vector([1, 1]), '0.5fh')
    '<1.41421, 0.78540>'
    >>> format(Vector([1, 1, 1]), 'h')  # doctest:+ELLIPSIS
    '<1.73205..., 0.95531..., 0.78539...>'
    >>> format(Vector([2, 2, 2]), '.3eh')
    '<3.464e+00, 9.553e-01, 7.854e-01>'
    >>> format(Vector([0, 0, 0]), '0.5fh')
    '<0.00000, 0.00000, 0.00000>'
    >>> format(Vector([-1, -1, -1, -1]), 'h')  # doctest:+ELLIPSIS
    '<2.0, 2.09439..., 2.18627..., 3.92699...>'
    >>> format(Vector([2, 2, 2, 2]), '.3eh')
    '<4.000e+00, 1.047e+00, 9.553e-01, 7.854e-01>'
    >>> format(Vector([0, 1, 0, 0]), '0.5fh')
    '<1.00000, 1.57080, 0.00000, 0.00000>'
"""

from array import array
import reprlib
import math
import numbers
import functools
import operator
import itertools  # <1>


class Vector:
    typecode = 'd'

    def __init__(self, components):
        self._components = array(self.typecode, components)

    def __iter__(self):
        return iter(self._components)

    def __repr__(self):
        components = reprlib.repr(self._components)
        components = components[components.find('['):-1]
        return 'Vector({})'.format(components)

    def __str__(self):
        return str(tuple(self))

    def __bytes__(self):
        return (bytes([ord(self.typecode)]) +
                bytes(self._components))

    def __eq__(self, other):
        return (len(self) == len(other) and
                all(a == b for a, b in zip(self, other)))

    def __hash__(self):
        hashes = (hash(x) for x in self)
        return functools.reduce(operator.xor, hashes, 0)

    def __abs__(self):
        return math.sqrt(sum(x * x for x in self))

    def __bool__(self):
        return bool(abs(self))

    def __len__(self):
        return len(self._components)

    def __getitem__(self, index):
        cls = type(self)
        if isinstance(index, slice):
            return cls(self._components[index])
        elif isinstance(index, numbers.Integral):
            return self._components[index]
        else:
            msg = '{.__name__} indices must be integers'
            raise TypeError(msg.format(cls))

    shortcut_names = 'xyzt'

    def __getattr__(self, name):
        cls = type(self)
        if len(name) == 1:
            pos = cls.shortcut_names.find(name)
            if 0 <= pos < len(self._components):
                return self._components[pos]
        msg = '{.__name__!r} object has no attribute {!r}'
        raise AttributeError(msg.format(cls, name))

    def angle(self, n):  # <2>
        r = math.sqrt(sum(x * x for x in self[n:]))
        a = math.atan2(r, self[n-1])
        if (n == len(self) - 1) and (self[-1] < 0):
            return math.pi * 2 - a
        else:
            return a

    def angles(self):  # <3>
        return (self.angle(n) for n in range(1, len(self)))

    def __format__(self, fmt_spec=''):
        if fmt_spec.endswith('h'):  # hyperspherical coordinates
            fmt_spec = fmt_spec[:-1]
            coords = itertools.chain([abs(self)],
                                     self.angles())  # <4>
            outer_fmt = '<{}>'  # <5>
        else:
            coords = self
            outer_fmt = '({})'  # <6>
        components = (format(c, fmt_spec) for c in coords)  # <7>
        return outer_fmt.format(', '.join(components))  # <8>

    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(memv)
# END VECTOR_V5

Python parses XML files (parses, updates, writes)

Overview

This blog post will include parsing the XML file, appending new elements to write to the XML, and updating the value of a node in the original XML file. The python xml.dom.minidom package is used, and the details can be seen in its official document: xml.dom.minidom official document. The full text will operate around the following customer.xml :

<?xml version="1.0" encoding="utf-8" ?>
<!-- This is list of customers -->
<customers>
  <customer ID="C001">
    <name>Acme Inc.</name>
    <phone>12345</phone>
    <comments>
      <![CDATA[Regular customer since 1995]]>
    </comments>
  </customer>
  <customer ID="C002">
    <name>Star Wars Inc.</name>
    <phone>23456</phone>
    <comments>
      <![CDATA[A small but healthy company.]]>
    </comments>
  </customer>
</customers>

CDATA: part of the data in XML that is not parsed by the parser.

declaration: in this article, nodes and nodes are considered to be the same concept, you can replace them anywhere in the whole text, I personally feel the difference is not very big, of course, you can also view it as my typing error.

1. Parse XML file

when parsing XML, all text is stored in a text node, and the text nodes are regarded as nodes child elements, such as: 2005, element nodes, has a text node value is “2005”, “2005” is not the value of the element, the most commonly used method is the getElementsByTagName () method, and then further access to the nodes according to the document structure parsing.

specific theory is not enough to describe, with the above XML file and the following code, you will clearly see the operation method, the following code is to perform all node names and node information output as follows:

# -*- coding: utf-8 -*-
"""
    @Author  : LiuZhian
    @Time    : 2019/4/24 0024 上午 9:19
    @Comment : 
"""
from xml.dom.minidom import parse
def readXML():
	domTree = parse("./customer.xml")
	# 文档根元素
	rootNode = domTree.documentElement
	print(rootNode.nodeName)

	# 所有顾客
	customers = rootNode.getElementsByTagName("customer")
	print("****所有顾客信息****")
	for customer in customers:
		if customer.hasAttribute("ID"):
			print("ID:", customer.getAttribute("ID"))
			# name 元素
			name = customer.getElementsByTagName("name")[0]
			print(name.nodeName, ":", name.childNodes[0].data)
			# phone 元素
			phone = customer.getElementsByTagName("phone")[0]
			print(phone.nodeName, ":", phone.childNodes[0].data)
			# comments 元素
			comments = customer.getElementsByTagName("comments")[0]
			print(comments.nodeName, ":", comments.childNodes[0].data)

if __name__ == '__main__':
	readXML()

2. Write to XML file When writing

, I think there are two ways:

Create a new XML file

and append some element information to the existing XML file

in both cases, the method for creating element nodes is similar, all you have to do is create/get a DOM object, and then create a new node based on the DOM.

in the first case, you can create it by dom= minidom.document (); In the second case, you can get the dom object directly by parsing the existing XML file, for example dom = parse("./customer.xml")

when creating element/text nodes, you’ll probably write a four-step sequence like this:

create a new element node createElement()

create a text node createTextNode()

mount the text node on the element node

mount the element node on its parent.

now, I need to create a new customer node with the following information :

<customer ID="C003">
    <name>kavin</name>
    <phone>32467</phone>
    <comments>
      <![CDATA[A small but healthy company.]]>
    </comments>
  </customer>

code as follows:

def writeXML():
	domTree = parse("./customer.xml")
	# 文档根元素
	rootNode = domTree.documentElement

	# 新建一个customer节点
	customer_node = domTree.createElement("customer")
	customer_node.setAttribute("ID", "C003")

	# 创建name节点,并设置textValue
	name_node = domTree.createElement("name")
	name_text_value = domTree.createTextNode("kavin")
	name_node.appendChild(name_text_value)  # 把文本节点挂到name_node节点
	customer_node.appendChild(name_node)

	# 创建phone节点,并设置textValue
	phone_node = domTree.createElement("phone")
	phone_text_value = domTree.createTextNode("32467")
	phone_node.appendChild(phone_text_value)  # 把文本节点挂到name_node节点
	customer_node.appendChild(phone_node)

	# 创建comments节点,这里是CDATA
	comments_node = domTree.createElement("comments")
	cdata_text_value = domTree.createCDATASection("A small but healthy company.")
	comments_node.appendChild(cdata_text_value)
	customer_node.appendChild(comments_node)

	rootNode.appendChild(customer_node)

	with open('added_customer.xml', 'w') as f:
		# 缩进 - 换行 - 编码
		domTree.writexml(f, addindent='  ', encoding='utf-8')

if __name__ == '__main__':
	writeXML()

3. Update XML file

when updating XML, we only need to find the corresponding element node first, and then update the value of the text node or attribute under it, and then save it to the file. I will not say more about the details, but I have made the idea clear in the code, as follows:

def updateXML():
	domTree = parse("./customer.xml")
	# 文档根元素
	rootNode = domTree.documentElement

	names = rootNode.getElementsByTagName("name")
	for name in names:
		if name.childNodes[0].data == "Acme Inc.":
			# 获取到name节点的父节点
			pn = name.parentNode
			# 父节点的phone节点，其实也就是name的兄弟节点
			# 可能有sibNode方法，我没试过，大家可以google一下
			phone = pn.getElementsByTagName("phone")[0]
			# 更新phone的取值
			phone.childNodes[0].data = 99999

	with open('updated_customer.xml', 'w') as f:
		# 缩进 - 换行 - 编码
		domTree.writexml(f, addindent='  ', encoding='utf-8')

if __name__ == '__main__':
	updateXML()

if there is anything wrong, please advise ~

The Python DOM method iterates over all the XML in a folder

I just started learning Python recently. To implement this same function, iterate through an XML file in the res\\value directory of an Android app code. Because its XML file format is basically the following, relatively simple.

string.xml

<?xml version="1.0" encoding="utf-8"?>
<resources>

    <string name="app_name">ActivityLife</string>
    <string name="hello_world">Hello world!</string>
    <string name="action_settings">Settings</string>

</resources>

As you can see, there are actually three String children under the resources parent node. Because I’m new to Python, I find it easier to use the XML.dom method (forgive me for being a whitehead).

The

XML dom defines the objects and attributes of XML elements and the methods to access them. Dom treats XML documents as a tree structure. To see this, click the open link. Ok, let’s get back to business. To solve the above problem, the specific idea is

1. Traverse the folder to get all the XML files. (glob.glob())

2. Read and parse each XML. Gets the child node attribute value and the text node value.

below is the implementation code:

# -*- coding: UTF-8 -*-
#遍历某个文件夹下所有xml文件
import sys  
import glob  
import os
import  xml.dom.minidom  
def traversalDir_XMLFile(path):
    #判断路径是否存在
 if (os.path.exists(path)):
     #得到该文件夹路径下下的所有xml文件路径
    f = glob.glob(path + '\\*.xml' )      
    
    for file in f : 
        print file
        #打开xml文档
        dom = xml.dom.minidom.parse(file)
        #得到文档元素对象
        root = dom.documentElement
        #得到子节点列表，print childs                
        childs = root.childNodes
        for child in childs:
            #筛选符合需求的child                    
            if(child.nodeType == 1):
                #得出子节点属性和文本节点值           
                print'key:', child.getAttribute('name')
                print'value:',child.firstChild.data
            
traversalDir_XMLFile('E:\\work\\ActivityLife\\res\\values')

The path path is one of my value folders with dimens.xml; string.xml; styles.xml; There are also several Word files and TXT format files. The output result is:

E:\work\ActivityLife\res\values\dimens.xml
key: activity_horizontal_margin
value: 16dp
key: activity_vertical_margin
value: 16dp
E:\work\ActivityLife\res\values\strings.xml
key: app_name
value: ActivityLife
key: hello_world
value: Hello world!
key: action_settings
value: Settings
E:\work\ActivityLife\res\values\styles.xml
key: AppBaseTheme
value: 
        
key: AppTheme
value:

The comments in the code are pretty clear. Because I have other XML files in my file, although the parent nodes are all under Resources, the children are different. There are strings, there are dimen and so on. But the format is the same. So, instead of using child.nodeValue when I print value, I get none, which is not so clear. I think it might be this:

Text is always stored in the text node

A common mistake in DOM processing is to assume that the element node contains text.

However, the text of the element node is stored in the text node.

In this case: < year> 2005< /year> , element node < year> , has a text node with a value of “2005”.

“2005” not < year> Element value!

specific reasons hope readers can tell me ha!! Do ⌒ (* ^ – ゜) v

some reference documents are given below:

Python golb methods: http://www.cnblogs.com/hongten/p/hongten_python_glob.html

XML parsing: http://www.cnblogs.com/fnng/p/3581433.html

http://www.runoob.com/python/python-xml.html

Python recursively traverses all files in the directory to find the specified file

before I read someone on the Internet saying “os.path.isdir() determines that absolute path must be written”, I thought Python has an iteration context, why not?Therefore,

is verified in this paper

code section

Consider using a path variable to refer to the current traversal element’s absolute path (correct practice)

def search(root, target):
    items = os.listdir(root)
    for item in items:
        path = os.path.join(root, item)
        if os.path.isdir(path):
            print('[-]', path)
            search(path, target)
        elif path.split('/')[-1] == target:
            print('[+]', path)
        else:
            print('[!]', path)

normal traversal result

if I write it this way, I’m going to replace all the paths with item (iterative element)

def search(root, target):
    items = os.listdir(root)
    for item in items:
        if os.path.isdir(item):
            print('[-]', item)
            search(os.path.join(root, item), target)
        elif item == target:
            print('[+]', os.path.join(root,item))
        else:
            print('[!]', os.path.join(root, item))

can be seen that this does not work: if you use the item to iterate over the current path, you will recursively recursively refer to the current path only two levels further down (subfolders, subfolder files), you can see that the missing context management mechanism is completely ineffective here

reflection and summary

1) how would you react if you just changed the iteration mechanism of the passing element for each recursion based on the correct traversal writing?

In the above example, just change search(os.path.join(root, item), target) to search(item, target) :

this is easy to see because the first time you call search(..) is passed in an absolute path. If the relative path is passed in because there is no context, it cannot correctly locate the location of the file

2)os.path. Abspath (path) method can replace os.path. Join (root, current)?

(fog) original os.path. Abspath (..) means: relative path to the current working directory! (not the relative path to the system root!)

so, os.path. Abspath (..) and OS path. Join (..) is two completely different things that cannot be replaced by


3) what exactly is a Python file?
 again, change each output to use type(..)  function package form 

 let's look at another example 

In line with the Python "everything is an object" belief, the path we enter (including its iteration version, and the variable used to accept it) is a string  STR  object, and the file handle (pointer) used to describe the file is a 
 object

In general, we search and filter folders and files horizontally, using the path description of  STR  type. However, to read or write a specific file (not a folder), we need to use the file handle of _io.TextIOWrapper type. Unlike Java, which is completely encapsulated as a File(String path) object, it reduces the role of the File path as a single individual -- but Python takes the path as a separate one, and many of our File operations (broadly defined as finding a specific File, not just a File IO) are based on the File path as 
4) thinking: is there any way to traverse the file other than the method given at the beginning?
 answer: yes

Chdir (path) os.chdir(path) path

but we do not recommend this method because it modifies the global variable, which is validated before and after the function call using the os.getcwd() function

First switch the working directory to the root directory, then execute the custom change() function

as you can see, by this time the global current working directory has changed

python: File Processing and Input and Output

User input

Save the following program as io_input.py:

def reverse(text): 
    return text[::-1] 
def is_backtext(text): 
    return text == reverse(text) 
something = input("请输入文字: ") 
if is_backtext(something): 
    print("是回文") 
else:
    print("不是回文")

Output:

python io_input.py
输入文字: abc
不是回文

python io_input.py
输入文字: moom
是回文

We use the slice function to flip the text. We have learned that we can slice a sequence from the beginning of position a to the end of position b by using seq[a:b]. We can also provide a third parameter to determine the step size of the slice (Step). The default step size is 1, it will return a continuous text. If a negative step size is given, such as -1, the flipped text will be returned.

The input() function can accept a string as a parameter and display it to the user. After that, it will wait for the user to input content or hit the return key. Once the user enters something and hits the return key, the input() function will return the text entered by the user.

We get the text and flip it. If the original text is the same as the flipped text, it is judged that this text is a palindrome.

file

You can open or use files by creating an object belonging to the file class and using its read, readline, and write methods appropriately , and read or write them. The ability to read or write files depends on how you specify to open the file. Finally, when you have finished the file, you can call the close method to tell Python that we have finished using the file.

Example (save as io_using_file.py):

poem = '''编程是很有趣的事件,
如果你想让你的工作也变得有趣的话：
使用Python！'''

# 打开文件以编辑（'w'riting） 
f = open('poem.txt', 'w',encoding='utf-8') 
# 向文件中编写文本 
f.write(poem) 
# 关闭文件 
f.close()

# 如果没有特别指定， 
# 将假定启用默认的阅读（'r'ead）模式 
f = open('poem.txt') 
while True: 
    line = f.readline() 
    # 零长度指示 EOF 
    if len(line) == 0: 
        break # 每行（`line`）的末尾 
    # 都已经有了换行符 
    #因为它是从一个文件中进行读取的 
    print(line, end='') 
# 关闭文件 
f.close()

Output:

python io_using_file.py
编程是很有趣的事件,
如果你想让你的工作也变得有趣的话：
使用Python！

How it works

First, we use the built-in open function and specify the file name and the open mode we want to use to open a file.

The open mode can be read mode (‘r’), write mode (‘w’) and append mode (‘a’).

We can also choose whether to read, write or append text in text mode (‘t’) or binary mode (‘b’ ). There are actually more modes available, help(open) will give you more details about them.

By default, open() treats the file as a text file and opens it in read mode .

In our case, we first open the file in write mode and use the write method of the file object to write the file, and finally close the file with close.

Next, we reopen the same file in reading mode. We don’t need to specify a certain mode, because “read text file” is the default. We use the readline method in the loop to read each line of the file. This method will be a complete line, which also contains a newline at the end of the line. When an empty string returns, it means that we have reached the end of the file and exit the loop with break

Finally, we finally closed the file with close. Now, you can check the content of the poem.txt file to confirm that the program has indeed written and read the file.

encoding=utf-8

When we read or write a file or when we want to communicate with other computers on the Internet, we need to convert our Unicode string to a format that can be sent and received. This format is called == “UTF- 8″==. We can read and write in this format, just use a simple keyword parameter to our standard open function: encoding=’utf-8′

Unicode has multiple translations such as “Unicode”, “Universal Code” and “International Code”.

Python traverses all files under the specified path and retrieves them according to the time interval

demand

It is required to find the word documents in a certain date range in the folder, and list the names and paths of all words, such as all word documents under the D drive from July 5 to July 31.

Modify file type

Modify file path

Retrieve file modification time interval

#conding=utf8
import os
import time
g = os.walk(r"C:\Users\Administrator\Downloads")
def judge_time_file(path, file, update_time):
    if not file.endswith(('.doc','.docx')):
        return False
    start_time = time.mktime(time.strptime('2020-04-12 00:00:00', "%Y-%m-%d %H:%M:%S"))
    end_time   = time.mktime(time.strptime('2020-05-23 00:00:00', "%Y-%m-%d %H:%M:%S"))
    # print(start_time ,  update_time , end_time)
    if start_time < update_time < end_time:
        return True
    return True
 
data_list = []
i = 0
 
for path, dir_list, file_list in g:
 
    for file_name in file_list:
        local_time = os.stat(os.path.join(path, file_name)).st_mtime
        if judge_time_file(path, file_name, local_time):
            data_list.append([os.path.join(path, file_name), time.strftime("%Y-%m-%d %H:%M:%S",time.localtime(local_time))])
            # print(data_list)
        i=i+1
        print(i)
data_list.sort(key=lambd

ProgrammerAH

Programmer Guide, Tips and Tutorial