XPath Support¶
pygixml provides full XPath 1.0 support through pugixml’s powerful XPath implementation.
Basic XPath Usage¶
Selecting Nodes¶
import pygixml
xml_string = '''
<library>
<book id="1" category="fiction">
<title>The Great Gatsby</title>
<author>F. Scott Fitzgerald</author>
<year>1925</year>
<price>12.99</price>
</book>
<book id="2" category="fiction">
<title>1984</title>
<author>George Orwell</author>
<year>1949</year>
<price>10.99</price>
</book>
<book id="3" category="non-fiction">
<title>A Brief History of Time</title>
<author>Stephen Hawking</author>
<year>1988</year>
<price>15.99</price>
</book>
</library>
'''
doc = pygixml.parse_string(xml_string)
root = doc.first_child()
# Select all books
books = root.select_nodes("book")
print(f"Found {len(books)} books")
# Select single book
book = root.select_node("book[@id='1']")
if book:
print(f"Book 1: {book.node().child('title').child_value()}")
# Select by attribute
fiction_books = root.select_nodes("book[@category='fiction']")
print(f"Found {len(fiction_books)} fiction books")
XPath Query Object¶
For repeated queries, use XPathQuery for better performance:
# Create XPath query once, use multiple times
fiction_query = pygixml.XPathQuery("book[@category='fiction']")
expensive_query = pygixml.XPathQuery("book[price > 12]")
fiction_books = fiction_query.evaluate_node_set(root)
expensive_books = expensive_query.evaluate_node_set(root)
print(f"Fiction books: {len(fiction_books)}")
print(f"Expensive books: {len(expensive_books)}")
XPath Evaluation Types¶
Boolean Evaluation¶
# Check if any books exist
has_books = pygixml.XPathQuery("book").evaluate_boolean(root)
print(f"Has books: {has_books}") # Output: True
# Check if there are expensive books
has_expensive = pygixml.XPathQuery("book[price > 20]").evaluate_boolean(root)
print(f"Has expensive books: {has_expensive}") # Output: False
Number Evaluation¶
# Get average price
avg_price = pygixml.XPathQuery("sum(book/price) div count(book)").evaluate_number(root)
print(f"Average price: ${avg_price:.2f}")
# Get total books
total_books = pygixml.XPathQuery("count(book)").evaluate_number(root)
print(f"Total books: {total_books}")
String Evaluation¶
# Get first book title
first_title = pygixml.XPathQuery("book[1]/title").evaluate_string(root)
print(f"First title: {first_title}")
# Get all titles concatenated
all_titles = pygixml.XPathQuery("string-join(book/title, ', ')").evaluate_string(root)
print(f"All titles: {all_titles}")
Advanced XPath Features¶
Positional Functions¶
# First book
first_book = root.select_node("book[1]")
# Last book
last_book = root.select_node("book[last()]")
# Books in specific positions
second_book = root.select_node("book[2]")
first_two_books = root.select_nodes("book[position() <= 2]")
Text and Value Selection¶
# Select by text content
gatsby = root.select_node("book[title='The Great Gatsby']")
# Select by partial text
history_books = root.select_nodes("book[contains(title, 'History')]")
# Select by numeric comparison
old_books = root.select_nodes("book[year < 1950]")
expensive_books = root.select_nodes("book[price > 12]")
Complex Expressions¶
# Multiple conditions
old_fiction = root.select_nodes("book[@category='fiction' and year < 1950]")
# Union of selections
fiction_or_expensive = root.select_nodes("book[@category='fiction'] | book[price > 14]")
# Nested selections
authors = root.select_nodes("book/author")
for author in authors:
print(f"Author: {author.node().child_value()}")
XPath Axes¶
Child Axis¶
# All direct children named 'book'
books = root.select_nodes("child::book")
# All children (any name)
all_children = root.select_nodes("child::*")
Attribute Axis¶
# All attributes
all_attributes = root.select_nodes("book/@*")
# Specific attribute
ids = root.select_nodes("book/@id")
Descendant Axis¶
# All descendant titles (at any level)
all_titles = root.select_nodes("descendant::title")
# Titles that are grandchildren
grandchild_titles = root.select_nodes("book/*/title")
XPath Functions¶
String Functions¶
# Contains
contains_gatsby = root.select_nodes("book[contains(title, 'Gatsby')]")
# Starts with
starts_with_the = root.select_nodes("book[starts-with(title, 'The')]")
# String length
long_titles = root.select_nodes("book[string-length(title) > 15]")
# Substring
substring_books = root.select_nodes("book[substring(title, 1, 3) = 'The']")
Number Functions¶
# Round
rounded_price = pygixml.XPathQuery("round(book[1]/price)").evaluate_number(root)
# Floor and ceiling
floor_price = pygixml.XPathQuery("floor(book[1]/price)").evaluate_number(root)
ceil_price = pygixml.XPathQuery("ceiling(book[1]/price)").evaluate_number(root)
Node Set Functions¶
# Count
book_count = pygixml.XPathQuery("count(book)").evaluate_number(root)
# Position
first_book = root.select_node("book[position() = 1]")
# Last position
last_book = root.select_node("book[position() = last()]")
Performance Tips¶
Use XPathQuery for repeated queries - Compile once, use many times
Be specific in your paths - Avoid wildcards when possible
Use attributes for filtering - Attribute comparisons are faster than text comparisons
Limit result sets - Use positional predicates to limit results
Common XPath Patterns¶
# Find elements with specific attribute
elements_with_id = root.select_nodes("//*[@id]")
# Find elements with specific text
elements_with_text = root.select_nodes("//*[text()='specific text']")
# Find parent of specific element
parent_of_title = root.select_node("title/..")
# Find siblings
next_sibling = root.select_node("book[1]/following-sibling::book[1]")
# Find ancestors
ancestors = root.select_nodes("title/ancestor::*")
Supported XPath 1.0 Features¶
All core XPath 1.0 axes:
child,descendant,parent,ancestor,following-sibling,preceding-sibling,following,preceding,attribute,namespace,self,descendant-or-self,ancestor-or-selfAll XPath 1.0 functions:
string,number,boolean,concat,contains,starts-with,substring,substring-before,substring-after,string-length,normalize-space,translate,not,true,false,lang,sum,floor,ceiling,round,position,last,count,local-name,namespace-uri,nameFull boolean and comparison operators
Complete numeric operations
String operations and comparisons