7.4 - Data Handling and Manipulation
Effective data handling and manipulation are essential skills for C# developers. This chapter explores advanced techniques for querying, transforming, and processing data using LINQ, working with common data formats like XML and JSON, and implementing efficient serialization and deserialization strategies.
7.4.1 - LINQ Fundamentals
Language Integrated Query (LINQ) provides a unified syntax for querying various data sources, from in-memory collections to databases and XML.
7.4.1.1 - LINQ Architecture
LINQ consists of several components:
- Standard Query Operators: Extension methods that form the LINQ pattern
- Query Expressions: C# language syntax for writing queries
- LINQ Providers: Implementations that translate LINQ queries for specific data sources
// The basic LINQ architecture
IEnumerable<int> numbers = new[] { 1, 2, 3, 4, 5 };
// Using standard query operators (method syntax)
var evenNumbers = numbers.Where(n => n % 2 == 0);
// Using query expressions (query syntax)
var evenNumbersQuery = from n in numbers
where n % 2 == 0
select n;
7.4.1.2 - Deferred Execution
LINQ queries are typically executed when the results are enumerated, not when the query is defined:
// Create a data source
List<int> numbers = new List<int> { 1, 2, 3 };
// Define a query
var query = numbers.Where(n => n > 1);
// The query is not executed yet
numbers.Add(4); // Modifying the source
// Now the query executes, including the new element
foreach (var n in query) // Output: 2, 3, 4
{
Console.WriteLine(n);
}
// Force immediate execution with ToList(), ToArray(), etc.
var immediateResults = numbers.Where(n => n > 1).ToList();
numbers.Add(5); // This won't affect immediateResults
7.4.1.3 - Common LINQ Operators
var numbers = Enumerable.Range(1, 10);
var words = new[] { "apple", "banana", "cherry", "date", "elderberry" };
// Filtering
var evenNumbers = numbers.Where(n => n % 2 == 0); // 2, 4, 6, 8, 10
// Projection
var squares = numbers.Select(n => n * n); // 1, 4, 9, 16, 25, ...
// Ordering
var descendingNumbers = numbers.OrderByDescending(n => n); // 10, 9, 8, ...
var alphabetical = words.OrderBy(w => w); // apple, banana, cherry, ...
// Grouping
var groupedByLength = words.GroupBy(w => w.Length);
// Joining
var people = new[] {
new { Id = 1, Name = "John" },
new { Id = 2, Name = "Jane" }
};
var orders = new[] {
new { PersonId = 1, Product = "Book" },
new { PersonId = 1, Product = "Pen" },
new { PersonId = 2, Product = "Notebook" }
};
var peopleWithOrders = people.Join(
orders,
person => person.Id,
order => order.PersonId,
(person, order) => new { person.Name, order.Product }
);
// Aggregation
int sum = numbers.Sum(); // 55
int max = numbers.Max(); // 10
double average = numbers.Average(); // 5.5
int count = numbers.Count(); // 10
// Element operations
int first = numbers.First(); // 1
int last = numbers.Last(); // 10
int fifth = numbers.ElementAt(4); // 5
// Quantifiers
bool anyEven = numbers.Any(n => n % 2 == 0); // true
bool allPositive = numbers.All(n => n > 0); // true
// Set operations
var set1 = new[] { 1, 2, 3 };
var set2 = new[] { 3, 4, 5 };
var union = set1.Union(set2); // 1, 2, 3, 4, 5
var intersection = set1.Intersect(set2); // 3
var difference = set1.Except(set2); // 1, 2
// Partitioning
var firstThree = numbers.Take(3); // 1, 2, 3
var skipThree = numbers.Skip(3); // 4, 5, 6, 7, 8, 9, 10
7.4.1.4 - Custom LINQ Operators
You can extend LINQ with your own operators:
public static class LinqExtensions
{
// Custom LINQ operator to get every nth element
public static IEnumerable<T> EveryNth<T>(this IEnumerable<T> source, int n)
{
if (source == null) throw new ArgumentNullException(nameof(source));
if (n <= 0) throw new ArgumentOutOfRangeException(nameof(n));
return EveryNthIterator(source, n);
}
private static IEnumerable<T> EveryNthIterator<T>(IEnumerable<T> source, int n)
{
int index = 0;
foreach (var item in source)
{
if (index % n == 0)
{
yield return item;
}
index++;
}
}
}
// Usage
var numbers = Enumerable.Range(1, 20);
var everyThird = numbers.EveryNth(3); // 1, 4, 7, 10, 13, 16, 19
7.4.2 - LINQ Query Syntax vs. Method Syntax
LINQ provides two syntaxes for writing queries: query syntax and method syntax.
7.4.2.1 - Query Syntax
Query syntax resembles SQL and is often more readable for complex queries:
// Basic query syntax
var query = from n in numbers
where n % 2 == 0
orderby n descending
select n * n;
// Query with multiple from clauses (cross join)
var crossJoin = from p in people
from o in orders
select new { p.Name, o.Product };
// Query with join
var joinQuery = from p in people
join o in orders on p.Id equals o.PersonId
select new { p.Name, o.Product };
// Query with group by
var groupQuery = from w in words
group w by w.Length into lengthGroup
orderby lengthGroup.Key
select new { Length = lengthGroup.Key, Words = lengthGroup.ToList() };
// Query with let (introducing a new variable)
var letQuery = from w in words
let length = w.Length
where length > 5
orderby length
select new { Word = w, Length = length };
7.4.2.2 - Method Syntax
Method syntax uses extension methods and lambda expressions, offering more flexibility and access to all LINQ operators:
// Basic method syntax
var query = numbers
.Where(n => n % 2 == 0)
.OrderByDescending(n => n)
.Select(n => n * n);
// Cross join
var crossJoin = people
.SelectMany(p => orders, (p, o) => new { p.Name, o.Product });
// Join
var joinQuery = people
.Join(
orders,
p => p.Id,
o => o.PersonId,
(p, o) => new { p.Name, o.Product }
);
// Group by
var groupQuery = words
.GroupBy(w => w.Length)
.OrderBy(g => g.Key)
.Select(g => new { Length = g.Key, Words = g.ToList() });
// Introducing a new variable (no direct equivalent to 'let')
var letQuery = words
.Select(w => new { Word = w, Length = w.Length })
.Where(x => x.Length > 5)
.OrderBy(x => x.Length);
7.4.2.3 - Mixing Syntaxes
You can mix query and method syntax:
// Start with query syntax, end with method syntax
var mixedQuery = (from w in words
where w.Length > 3
select w)
.OrderBy(w => w)
.Take(3);
// Use method syntax for operations not supported in query syntax
var complexQuery = from p in people
join o in orders on p.Id equals o.PersonId
group o by p into customerOrders
where customerOrders.Count() > 1
select new {
Customer = customerOrders.Key.Name,
OrderCount = customerOrders.Count()
};
7.4.2.4 - When to Use Each Syntax
- Query Syntax: More readable for complex queries with multiple operations like joins and grouping
- Method Syntax: More concise for simple queries and provides access to all LINQ operators
7.4.3 - LINQ to Objects
LINQ to Objects operates on in-memory collections that implement IEnumerable<T>.
7.4.3.1 - Working with Collections
// Working with arrays
int[] numbers = { 1, 2, 3, 4, 5 };
var evenNumbers = numbers.Where(n => n % 2 == 0);
// Working with lists
List<string> fruits = new List<string> { "apple", "banana", "cherry" };
var longFruits = fruits.Where(f => f.Length > 5);
// Working with dictionaries
Dictionary<string, int> ages = new Dictionary<string, int>
{
{ "John", 30 },
{ "Jane", 25 },
{ "Bob", 40 }
};
var youngPeople = ages.Where(pair => pair.Value < 30)
.Select(pair => pair.Key);
// Working with custom collections
var employees = new List<Employee>
{
new Employee { Id = 1, Name = "John", Department = "IT", Salary = 50000 },
new Employee { Id = 2, Name = "Jane", Department = "HR", Salary = 60000 },
new Employee { Id = 3, Name = "Bob", Department = "IT", Salary = 55000 }
};
var itEmployees = employees.Where(e => e.Department == "IT");
var averageSalary = employees.Average(e => e.Salary);
var departmentGroups = employees.GroupBy(e => e.Department);
7.4.3.2 - Performance Considerations
// Avoid multiple enumerations
var numbers = Enumerable.Range(1, 1000000);
// Bad: Enumerates twice
if (numbers.Count() > 0 && numbers.Any(n => n < 0))
{
// Do something
}
// Good: Enumerate once and cache results when needed
var numbersList = numbers.ToList();
if (numbersList.Count > 0 && numbersList.Any(n => n < 0))
{
// Do something
}
// Use appropriate methods
// Bad: Unnecessarily loads all elements
var firstNumber = numbers.ToList().FirstOrDefault();
// Good: Stops at the first element
var firstNumberEfficient = numbers.FirstOrDefault();
// Use specialized methods
// Less efficient
var containsFive = numbers.Any(n => n == 5);
// More efficient for sets
var set = new HashSet<int>(numbers);
var containsFiveEfficient = set.Contains(5);
7.4.3.3 - Lazy Evaluation and Streaming
// Generate an infinite sequence
IEnumerable<int> InfiniteSequence()
{
int i = 0;
while (true)
{
yield return i++;
}
}
// Thanks to lazy evaluation, we can work with "infinite" sequences
var infinite = InfiniteSequence();
var first10 = infinite.Take(10); // Only takes what we need
// Process large data streams efficiently
IEnumerable<string> ReadLargeFile(string path)
{
using (var reader = new StreamReader(path))
{
string line;
while ((line = reader.ReadLine()) != null)
{
yield return line;
}
}
}
// Process the file line by line without loading it all into memory
var longLines = ReadLargeFile("large.txt")
.Where(line => line.Length > 100)
.Take(10);
7.4.4 - LINQ to XML
LINQ to XML provides a modern, LINQ-friendly way to work with XML documents.
7.4.4.1 - Creating XML Documents
// Create an XML document
XDocument document = new XDocument(
new XDeclaration("1.0", "utf-8", "yes"),
new XComment("This is a sample XML document"),
new XElement("Root",
new XElement("Person",
new XAttribute("Id", "1"),
new XElement("Name", "John Doe"),
new XElement("Age", 30),
new XElement("Address",
new XElement("Street", "123 Main St"),
new XElement("City", "Anytown"),
new XElement("ZipCode", "12345")
)
),
new XElement("Person",
new XAttribute("Id", "2"),
new XElement("Name", "Jane Smith"),
new XElement("Age", 25),
new XElement("Address",
new XElement("Street", "456 Oak Ave"),
new XElement("City", "Somewhere"),
new XElement("ZipCode", "67890")
)
)
)
);
// Save the document
document.Save("people.xml");
7.4.4.2 - Querying XML Documents
// Load an XML document
XDocument doc = XDocument.Load("people.xml");
// Query for all person names
var names = from person in doc.Root.Elements("Person")
select person.Element("Name").Value;
// Query with filtering
var youngPeople = from person in doc.Root.Elements("Person")
let age = (int)person.Element("Age")
where age < 30
select new {
Name = person.Element("Name").Value,
Age = age
};
// Query with element and attribute access
var peopleWithAddresses = from person in doc.Root.Elements("Person")
select new {
Id = (string)person.Attribute("Id"),
Name = (string)person.Element("Name"),
City = (string)person.Element("Address").Element("City")
};
// Using method syntax
var cities = doc.Root
.Elements("Person")
.Elements("Address")
.Elements("City")
.Select(city => city.Value)
.Distinct();
7.4.4.3 - Modifying XML Documents
// Load the document
XDocument doc = XDocument.Load("people.xml");
// Add a new person
doc.Root.Add(
new XElement("Person",
new XAttribute("Id", "3"),
new XElement("Name", "Bob Johnson"),
new XElement("Age", 35),
new XElement("Address",
new XElement("Street", "789 Pine St"),
new XElement("City", "Elsewhere"),
new XElement("ZipCode", "54321")
)
)
);
// Modify an existing element
XElement person = doc.Root.Elements("Person")
.FirstOrDefault(p => (string)p.Attribute("Id") == "1");
if (person != null)
{
person.Element("Age").Value = "31";
// Add a new element
person.Add(new XElement("Phone", "555-1234"));
}
// Remove elements
var peopleToRemove = doc.Root.Elements("Person")
.Where(p => (int)p.Element("Age") > 30);
foreach (var p in peopleToRemove.ToList()) // ToList to avoid modification during enumeration
{
p.Remove();
}
// Save the modified document
doc.Save("modified_people.xml");
7.4.4.4 - Working with Namespaces
// Create a document with namespaces
XNamespace ns = "http://example.com/people";
XNamespace addressNs = "http://example.com/address";
XDocument doc = new XDocument(
new XElement(ns + "People",
new XAttribute(XNamespace.Xmlns + "addr", addressNs),
new XElement(ns + "Person",
new XAttribute("Id", "1"),
new XElement(ns + "Name", "John Doe"),
new XElement(addressNs + "Address",
new XElement(addressNs + "City", "Anytown")
)
)
)
);
// Query with namespaces
var cities = from person in doc.Root.Elements(ns + "Person")
select (string)person.Element(addressNs + "Address")
.Element(addressNs + "City");
7.4.5 - LINQ to SQL and Entity Framework
LINQ to SQL and Entity Framework provide LINQ interfaces for querying relational databases.
7.4.5.1 - Entity Framework Core Basics
// Define a DbContext
public class ApplicationDbContext : DbContext
{
public DbSet<Customer> Customers { get; set; }
public DbSet<Order> Orders { get; set; }
protected override void OnConfiguring(DbContextOptionsBuilder optionsBuilder)
{
optionsBuilder.UseSqlServer("Server=.;Database=MyDb;Trusted_Connection=True;");
}
}
// Define entity classes
public class Customer
{
public int Id { get; set; }
public string Name { get; set; }
public string Email { get; set; }
public List<Order> Orders { get; set; }
}
public class Order
{
public int Id { get; set; }
public DateTime OrderDate { get; set; }
public decimal TotalAmount { get; set; }
public int CustomerId { get; set; }
public Customer Customer { get; set; }
}
// Basic LINQ queries with Entity Framework
using (var context = new ApplicationDbContext())
{
// Get all customers
var customers = context.Customers.ToList();
// Filter customers
var premiumCustomers = context.Customers
.Where(c => c.Orders.Sum(o => o.TotalAmount) > 1000)
.ToList();
// Join data
var customerOrders = context.Customers
.Join(
context.Orders,
customer => customer.Id,
order => order.CustomerId,
(customer, order) => new { customer.Name, order.OrderDate, order.TotalAmount }
)
.ToList();
// Include related data
var customersWithOrders = context.Customers
.Include(c => c.Orders)
.ToList();
// Aggregation
var orderStats = context.Orders
.GroupBy(o => o.CustomerId)
.Select(g => new {
CustomerId = g.Key,
OrderCount = g.Count(),
TotalSpent = g.Sum(o => o.TotalAmount),
AverageOrderAmount = g.Average(o => o.TotalAmount)
})
.ToList();
}
7.4.5.2 - Query Translation and Optimization
using (var context = new ApplicationDbContext())
{
// Log SQL queries
context.Database.Log = Console.WriteLine;
// Query that translates to efficient SQL
var recentOrders = context.Orders
.Where(o => o.OrderDate > DateTime.Now.AddDays(-30))
.OrderByDescending(o => o.TotalAmount)
.Take(10)
.ToList();
// Inefficient query (client-side evaluation)
var inefficientQuery = context.Customers
.Where(c => IsValidEmail(c.Email)) // Custom method can't be translated to SQL
.ToList();
// More efficient approach
var efficientQuery = context.Customers
.ToList() // Execute SQL query first
.Where(c => IsValidEmail(c.Email)); // Then filter in memory
// Using compiled queries for performance
var compiledQuery = EF.CompileQuery(
(ApplicationDbContext ctx, DateTime date) =>
ctx.Orders
.Where(o => o.OrderDate > date)
.OrderByDescending(o => o.TotalAmount)
.Take(10)
);
// Execute the compiled query
var result1 = compiledQuery(context, DateTime.Now.AddDays(-30));
var result2 = compiledQuery(context, DateTime.Now.AddDays(-60));
}
private bool IsValidEmail(string email)
{
// Custom validation logic
return email != null && email.Contains("@");
}
7.4.5.3 - Advanced Entity Framework Queries
using (var context = new ApplicationDbContext())
{
// Raw SQL queries
var customers = context.Customers
.FromSqlRaw("SELECT * FROM Customers WHERE Region = 'North'")
.ToList();
// SQL interpolation (safe from SQL injection)
string region = "North";
var safeCustomers = context.Customers
.FromSqlInterpolated($"SELECT * FROM Customers WHERE Region = {region}")
.ToList();
// Combining LINQ and raw SQL
var filteredCustomers = context.Customers
.FromSqlRaw("SELECT * FROM Customers")
.Where(c => c.Orders.Any(o => o.TotalAmount > 1000))
.ToList();
// Complex projections
var customerSummaries = context.Customers
.Select(c => new {
c.Id,
c.Name,
OrderCount = c.Orders.Count,
TotalSpent = c.Orders.Sum(o => o.TotalAmount),
LatestOrderDate = c.Orders.Max(o => o.OrderDate),
TopProducts = c.Orders
.SelectMany(o => o.OrderItems)
.GroupBy(i => i.ProductId)
.OrderByDescending(g => g.Sum(i => i.Quantity))
.Take(3)
.Select(g => g.Key)
.ToList()
})
.ToList();
// Pagination
int pageSize = 10;
int pageNumber = 2;
var pagedCustomers = context.Customers
.OrderBy(c => c.Name)
.Skip((pageNumber - 1) * pageSize)
.Take(pageSize)
.ToList();
}
7.4.6 - XML Processing
Beyond LINQ to XML, C# provides additional tools for working with XML.
7.4.6.1 - XmlReader and XmlWriter
XmlReader and XmlWriter provide fast, forward-only access to XML data:
// Reading XML with XmlReader
public void ReadXmlWithReader(string filePath)
{
using (XmlReader reader = XmlReader.Create(filePath))
{
while (reader.Read())
{
switch (reader.NodeType)
{
case XmlNodeType.Element:
Console.WriteLine($"Element: {reader.Name}");
// Read attributes
if (reader.HasAttributes)
{
while (reader.MoveToNextAttribute())
{
Console.WriteLine($" Attribute: {reader.Name}={reader.Value}");
}
reader.MoveToElement();
}
break;
case XmlNodeType.Text:
Console.WriteLine($"Text: {reader.Value}");
break;
case XmlNodeType.EndElement:
Console.WriteLine($"End Element: {reader.Name}");
break;
}
}
}
}
// Writing XML with XmlWriter
public void WriteXmlWithWriter(string filePath)
{
XmlWriterSettings settings = new XmlWriterSettings
{
Indent = true,
IndentChars = " "
};
using (XmlWriter writer = XmlWriter.Create(filePath, settings))
{
writer.WriteStartDocument();
writer.WriteStartElement("Root");
writer.WriteStartElement("Person");
writer.WriteAttributeString("Id", "1");
writer.WriteElementString("Name", "John Doe");
writer.WriteElementString("Age", "30");
writer.WriteStartElement("Address");
writer.WriteElementString("Street", "123 Main St");
writer.WriteElementString("City", "Anytown");
writer.WriteEndElement(); // Address
writer.WriteEndElement(); // Person
writer.WriteEndElement(); // Root
writer.WriteEndDocument();
}
}
7.4.6.2 - XmlDocument and XPath
XmlDocument provides a DOM-based API for XML manipulation, and XPath allows for powerful queries:
// Load and query XML with XmlDocument and XPath
public void XmlDocumentExample(string filePath)
{
XmlDocument doc = new XmlDocument();
doc.Load(filePath);
// Get all person elements
XmlNodeList persons = doc.SelectNodes("//Person");
foreach (XmlNode person in persons)
{
string name = person.SelectSingleNode("Name").InnerText;
string age = person.SelectSingleNode("Age").InnerText;
Console.WriteLine($"Name: {name}, Age: {age}");
}
// Get persons with age > 25
XmlNodeList olderPersons = doc.SelectNodes("//Person[Age > 25]");
Console.WriteLine($"Found {olderPersons.Count} persons older than 25");
// Get the first person's city
XmlNode city = doc.SelectSingleNode("//Person[1]/Address/City");
if (city != null)
{
Console.WriteLine($"First person's city: {city.InnerText}");
}
// Modify the document
XmlNode firstPerson = doc.SelectSingleNode("//Person[1]");
XmlElement phoneElement = doc.CreateElement("Phone");
phoneElement.InnerText = "555-1234";
firstPerson.AppendChild(phoneElement);
// Save the modified document
doc.Save("modified.xml");
}
7.4.6.3 - XML Serialization
XML serialization converts objects to XML and back:
// Classes for serialization
[XmlRoot("Person")]
public class Person
{
[XmlAttribute("Id")]
public int Id { get; set; }
[XmlElement("Name")]
public string Name { get; set; }
[XmlElement("Age")]
public int Age { get; set; }
[XmlElement("Address")]
public Address Address { get; set; }
[XmlArray("PhoneNumbers")]
[XmlArrayItem("Phone")]
public List<string> PhoneNumbers { get; set; }
[XmlIgnore]
public string InternalNotes { get; set; }
}
public class Address
{
public string Street { get; set; }
public string City { get; set; }
public string ZipCode { get; set; }
}
// Serialize an object to XML
public void SerializeToXml(Person person, string filePath)
{
XmlSerializer serializer = new XmlSerializer(typeof(Person));
using (TextWriter writer = new StreamWriter(filePath))
{
serializer.Serialize(writer, person);
}
}
// Deserialize XML to an object
public Person DeserializeFromXml(string filePath)
{
XmlSerializer serializer = new XmlSerializer(typeof(Person));
using (TextReader reader = new StreamReader(filePath))
{
return (Person)serializer.Deserialize(reader);
}
}
// Usage
public void XmlSerializationExample()
{
Person person = new Person
{
Id = 1,
Name = "John Doe",
Age = 30,
Address = new Address
{
Street = "123 Main St",
City = "Anytown",
ZipCode = "12345"
},
PhoneNumbers = new List<string> { "555-1234", "555-5678" },
InternalNotes = "This won't be serialized"
};
SerializeToXml(person, "person.xml");
Person deserializedPerson = DeserializeFromXml("person.xml");
Console.WriteLine($"Deserialized: {deserializedPerson.Name}, {deserializedPerson.Age}");
}
7.4.7 - JSON Processing
JSON (JavaScript Object Notation) is a lightweight data interchange format widely used in web applications.
7.4.7.1 - System.Text.Json
The System.Text.Json namespace (introduced in .NET Core 3.0) provides high-performance JSON processing:
// Classes for JSON serialization
public class Person
{
public int Id { get; set; }
public string Name { get; set; }
public int Age { get; set; }
public Address Address { get; set; }
public List<string> PhoneNumbers { get; set; }
[JsonIgnore]
public string InternalNotes { get; set; }
}
public class Address
{
public string Street { get; set; }
public string City { get; set; }
[JsonPropertyName("zip")]
public string ZipCode { get; set; }
}
// Serialize an object to JSON
public string SerializeToJson(Person person)
{
var options = new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
return JsonSerializer.Serialize(person, options);
}
// Deserialize JSON to an object
public Person DeserializeFromJson(string json)
{
var options = new JsonSerializerOptions
{
PropertyNameCaseInsensitive = true
};
return JsonSerializer.Deserialize<Person>(json, options);
}
// Reading and writing JSON files
public void SaveToJsonFile(Person person, string filePath)
{
var options = new JsonSerializerOptions
{
WriteIndented = true,
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
string json = JsonSerializer.Serialize(person, options);
File.WriteAllText(filePath, json);
}
public Person LoadFromJsonFile(string filePath)
{
string json = File.ReadAllText(filePath);
return JsonSerializer.Deserialize<Person>(json);
}
7.4.7.2 - Working with JSON Documents
// Parse and manipulate JSON documents
public void JsonDocumentExample(string json)
{
using (JsonDocument document = JsonDocument.Parse(json))
{
JsonElement root = document.RootElement;
// Access properties
if (root.TryGetProperty("name", out JsonElement nameElement))
{
string name = nameElement.GetString();
Console.WriteLine($"Name: {name}");
}
// Access array elements
if (root.TryGetProperty("phoneNumbers", out JsonElement phoneArray))
{
foreach (JsonElement phone in phoneArray.EnumerateArray())
{
string phoneNumber = phone.GetString();
Console.WriteLine($"Phone: {phoneNumber}");
}
}
// Access nested objects
if (root.TryGetProperty("address", out JsonElement address))
{
if (address.TryGetProperty("city", out JsonElement city))
{
Console.WriteLine($"City: {city.GetString()}");
}
}
}
}
// Create JSON using JsonDocument
public string CreateJsonDocument()
{
using (MemoryStream ms = new MemoryStream())
{
using (Utf8JsonWriter writer = new Utf8JsonWriter(ms, new JsonWriterOptions { Indented = true }))
{
writer.WriteStartObject();
writer.WriteNumber("id", 1);
writer.WriteString("name", "John Doe");
writer.WriteNumber("age", 30);
writer.WriteStartObject("address");
writer.WriteString("street", "123 Main St");
writer.WriteString("city", "Anytown");
writer.WriteString("zip", "12345");
writer.WriteEndObject();
writer.WriteStartArray("phoneNumbers");
writer.WriteStringValue("555-1234");
writer.WriteStringValue("555-5678");
writer.WriteEndArray();
writer.WriteEndObject();
}
return Encoding.UTF8.GetString(ms.ToArray());
}
}
7.4.7.3 - JSON Serialization Options
// Configure JSON serialization
public void JsonSerializationOptions()
{
Person person = new Person
{
Id = 1,
Name = "John Doe",
Age = 30,
Address = new Address
{
Street = "123 Main St",
City = "Anytown",
ZipCode = "12345"
},
PhoneNumbers = new List<string> { "555-1234", "555-5678" }
};
// Default options
string defaultJson = JsonSerializer.Serialize(person);
// Pretty printing
var indentedOptions = new JsonSerializerOptions
{
WriteIndented = true
};
string indentedJson = JsonSerializer.Serialize(person, indentedOptions);
// Camel case property names
var camelCaseOptions = new JsonSerializerOptions
{
PropertyNamingPolicy = JsonNamingPolicy.CamelCase
};
string camelCaseJson = JsonSerializer.Serialize(person, camelCaseOptions);
// Custom property naming policy
var snakeCaseOptions = new JsonSerializerOptions
{
PropertyNamingPolicy = new SnakeCaseNamingPolicy()
};
string snakeCaseJson = JsonSerializer.Serialize(person, snakeCaseOptions);
// Ignore null values
var ignoreNullOptions = new JsonSerializerOptions
{
DefaultIgnoreCondition = JsonIgnoreCondition.WhenWritingNull
};
string ignoreNullJson = JsonSerializer.Serialize(person, ignoreNullOptions);
// Custom converters
var converterOptions = new JsonSerializerOptions();
converterOptions.Converters.Add(new DateTimeConverter());
string converterJson = JsonSerializer.Serialize(person, converterOptions);
}
// Custom naming policy for snake_case
public class SnakeCaseNamingPolicy : JsonNamingPolicy
{
public override string ConvertName(string name)
{
if (string.IsNullOrEmpty(name))
return name;
var builder = new StringBuilder();
for (int i = 0; i < name.Length; i++)
{
if (i > 0 && char.IsUpper(name[i]))
{
builder.Append('_');
}
builder.Append(char.ToLowerInvariant(name[i]));
}
return builder.ToString();
}
}
// Custom JSON converter
public class DateTimeConverter : JsonConverter<DateTime>
{
public override DateTime Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options)
{
return DateTime.Parse(reader.GetString());
}
public override void Write(Utf8JsonWriter writer, DateTime value, JsonSerializerOptions options)
{
writer.WriteStringValue(value.ToString("yyyy-MM-dd"));
}
}
7.4.8 - Serialization and Deserialization
Serialization is the process of converting objects to a format that can be stored or transmitted, while deserialization is the reverse process.
7.4.8.1 - Binary Serialization
// Classes for binary serialization
[Serializable]
public class Person
{
public int Id { get; set; }
public string Name { get; set; }
public int Age { get; set; }
public Address Address { get; set; }
[NonSerialized]
private string _temporaryData;
}
[Serializable]
public class Address
{
public string Street { get; set; }
public string City { get; set; }
public string ZipCode { get; set; }
}
// Binary serialization
public void SerializeToBinary(Person person, string filePath)
{
using (FileStream fs = new FileStream(filePath, FileMode.Create))
{
BinaryFormatter formatter = new BinaryFormatter();
formatter.Serialize(fs, person);
}
}
// Binary deserialization
public Person DeserializeFromBinary(string filePath)
{
using (FileStream fs = new FileStream(filePath, FileMode.Open))
{
BinaryFormatter formatter = new BinaryFormatter();
return (Person)formatter.Deserialize(fs);
}
}
7.4.8.2 - Custom Serialization
// Implementing ISerializable for custom serialization
[Serializable]
public class CustomPerson : ISerializable
{
public int Id { get; set; }
public string Name { get; set; }
public DateTime DateOfBirth { get; set; }
// Calculated property
public int Age => (int)((DateTime.Now - DateOfBirth).TotalDays / 365.25);
// Default constructor
public CustomPerson()
{
}
// Constructor for deserialization
protected CustomPerson(SerializationInfo info, StreamingContext context)
{
Id = info.GetInt32("Id");
Name = info.GetString("Name");
DateOfBirth = info.GetDateTime("DOB");
}
// Method for serialization
public void GetObjectData(SerializationInfo info, StreamingContext context)
{
info.AddValue("Id", Id);
info.AddValue("Name", Name);
info.AddValue("DOB", DateOfBirth);
// Age is not serialized as it's calculated
}
}
// Using custom serialization
public void CustomSerializationExample()
{
CustomPerson person = new CustomPerson
{
Id = 1,
Name = "John Doe",
DateOfBirth = new DateTime(1990, 1, 1)
};
using (FileStream fs = new FileStream("custom_person.bin", FileMode.Create))
{
BinaryFormatter formatter = new BinaryFormatter();
formatter.Serialize(fs, person);
}
using (FileStream fs = new FileStream("custom_person.bin", FileMode.Open))
{
BinaryFormatter formatter = new BinaryFormatter();
CustomPerson deserializedPerson = (CustomPerson)formatter.Deserialize(fs);
Console.WriteLine($"Name: {deserializedPerson.Name}, Age: {deserializedPerson.Age}");
}
}
7.4.8.3 - DataContractSerializer
// Classes for DataContractSerializer
[DataContract]
public class Employee
{
[DataMember]
public int Id { get; set; }
[DataMember(Name = "FullName")]
public string Name { get; set; }
[DataMember(Order = 1)]
public int Age { get; set; }
[DataMember(IsRequired = true)]
public string Department { get; set; }
[DataMember(EmitDefaultValue = false)]
public decimal? Bonus { get; set; }
// Not serialized
public string TemporaryNote { get; set; }
}
// Serialize with DataContractSerializer
public void SerializeWithDataContract(Employee employee, string filePath)
{
DataContractSerializer serializer = new DataContractSerializer(typeof(Employee));
using (FileStream fs = new FileStream(filePath, FileMode.Create))
{
serializer.WriteObject(fs, employee);
}
}
// Deserialize with DataContractSerializer
public Employee DeserializeWithDataContract(string filePath)
{
DataContractSerializer serializer = new DataContractSerializer(typeof(Employee));
using (FileStream fs = new FileStream(filePath, FileMode.Open))
{
return (Employee)serializer.ReadObject(fs);
}
}
7.4.9 - Data Compression
Data compression reduces the size of data for storage or transmission.
7.4.9.1 - GZip Compression
// Compress data using GZip
public byte[] CompressData(byte[] data)
{
using (MemoryStream output = new MemoryStream())
{
using (GZipStream gzip = new GZipStream(output, CompressionLevel.Optimal))
{
gzip.Write(data, 0, data.Length);
}
return output.ToArray();
}
}
// Decompress GZip data
public byte[] DecompressData(byte[] compressedData)
{
using (MemoryStream input = new MemoryStream(compressedData))
using (GZipStream gzip = new GZipStream(input, CompressionMode.Decompress))
using (MemoryStream output = new MemoryStream())
{
gzip.CopyTo(output);
return output.ToArray();
}
}
// Compress a file
public void CompressFile(string inputPath, string outputPath)
{
using (FileStream input = new FileStream(inputPath, FileMode.Open))
using (FileStream output = new FileStream(outputPath, FileMode.Create))
using (GZipStream gzip = new GZipStream(output, CompressionLevel.Optimal))
{
input.CopyTo(gzip);
}
}
// Decompress a file
public void DecompressFile(string compressedPath, string outputPath)
{
using (FileStream input = new FileStream(compressedPath, FileMode.Open))
using (GZipStream gzip = new GZipStream(input, CompressionMode.Decompress))
using (FileStream output = new FileStream(outputPath, FileMode.Create))
{
gzip.CopyTo(output);
}
}
7.4.9.2 - Deflate and Brotli Compression
// Compress with Deflate
public byte[] CompressWithDeflate(byte[] data)
{
using (MemoryStream output = new MemoryStream())
{
using (DeflateStream deflate = new DeflateStream(output, CompressionLevel.Optimal))
{
deflate.Write(data, 0, data.Length);
}
return output.ToArray();
}
}
// Decompress Deflate data
public byte[] DecompressDeflate(byte[] compressedData)
{
using (MemoryStream input = new MemoryStream(compressedData))
using (DeflateStream deflate = new DeflateStream(input, CompressionMode.Decompress))
using (MemoryStream output = new MemoryStream())
{
deflate.CopyTo(output);
return output.ToArray();
}
}
// Compress with Brotli (available in .NET Core 2.1+)
public byte[] CompressWithBrotli(byte[] data)
{
using (MemoryStream output = new MemoryStream())
{
using (BrotliStream brotli = new BrotliStream(output, CompressionLevel.Optimal))
{
brotli.Write(data, 0, data.Length);
}
return output.ToArray();
}
}
// Decompress Brotli data
public byte[] DecompressBrotli(byte[] compressedData)
{
using (MemoryStream input = new MemoryStream(compressedData))
using (BrotliStream brotli = new BrotliStream(input, CompressionMode.Decompress))
using (MemoryStream output = new MemoryStream())
{
brotli.CopyTo(output);
return output.ToArray();
}
}
7.4.9.3 - ZipArchive for Multiple Files
// Create a zip archive with multiple files
public void CreateZipArchive(string outputPath, Dictionary<string, byte[]> files)
{
using (FileStream zipFile = new FileStream(outputPath, FileMode.Create))
using (ZipArchive archive = new ZipArchive(zipFile, ZipArchiveMode.Create))
{
foreach (var file in files)
{
ZipArchiveEntry entry = archive.CreateEntry(file.Key, CompressionLevel.Optimal);
using (Stream entryStream = entry.Open())
{
entryStream.Write(file.Value, 0, file.Value.Length);
}
}
}
}
// Extract files from a zip archive
public Dictionary<string, byte[]> ExtractZipArchive(string zipPath)
{
Dictionary<string, byte[]> files = new Dictionary<string, byte[]>();
using (FileStream zipFile = new FileStream(zipPath, FileMode.Open))
using (ZipArchive archive = new ZipArchive(zipFile, ZipArchiveMode.Read))
{
foreach (ZipArchiveEntry entry in archive.Entries)
{
using (Stream entryStream = entry.Open())
using (MemoryStream memoryStream = new MemoryStream())
{
entryStream.CopyTo(memoryStream);
files[entry.FullName] = memoryStream.ToArray();
}
}
}
return files;
}
// Add files to an existing zip archive
public void AddFilesToZip(string zipPath, Dictionary<string, byte[]> filesToAdd)
{
using (FileStream zipFile = new FileStream(zipPath, FileMode.Open))
using (ZipArchive archive = new ZipArchive(zipFile, ZipArchiveMode.Update))
{
foreach (var file in filesToAdd)
{
ZipArchiveEntry entry = archive.CreateEntry(file.Key, CompressionLevel.Optimal);
using (Stream entryStream = entry.Open())
{
entryStream.Write(file.Value, 0, file.Value.Length);
}
}
}
}
7.4.10 - Working with Large Datasets
Efficiently processing large datasets requires special techniques to manage memory and performance.
7.4.10.1 - Streaming Data Processing
// Process a large CSV file line by line
public void ProcessLargeCsvFile(string filePath)
{
using (StreamReader reader = new StreamReader(filePath))
{
// Read and process the header
string header = reader.ReadLine();
string[] columns = header.Split(',');
// Process each line without loading the entire file
string line;
while ((line = reader.ReadLine()) != null)
{
string[] values = line.Split(',');
ProcessCsvLine(columns, values);
}
}
}
private void ProcessCsvLine(string[] columns, string[] values)
{
// Process the data
for (int i = 0; i < columns.Length && i < values.Length; i++)
{
Console.WriteLine($"{columns[i]}: {values[i]}");
}
}
// Stream large XML data
public void StreamLargeXml(string filePath)
{
using (XmlReader reader = XmlReader.Create(filePath))
{
while (reader.Read())
{
if (reader.NodeType == XmlNodeType.Element && reader.Name == "Item")
{
// Process each item element
XElement item = XElement.ReadFrom(reader) as XElement;
if (item != null)
{
ProcessXmlItem(item);
}
}
}
}
}
private void ProcessXmlItem(XElement item)
{
// Process the XML element
Console.WriteLine($"ID: {item.Attribute("Id")?.Value}");
Console.WriteLine($"Name: {item.Element("Name")?.Value}");
}
// Stream large JSON data
public void StreamLargeJson(string filePath)
{
using (FileStream fs = File.OpenRead(filePath))
using (JsonDocument document = JsonDocument.Parse(fs))
{
JsonElement root = document.RootElement;
// Assuming the root is an array of items
foreach (JsonElement item in root.EnumerateArray())
{
ProcessJsonItem(item);
}
}
}
private void ProcessJsonItem(JsonElement item)
{
// Process the JSON element
if (item.TryGetProperty("id", out JsonElement id))
{
Console.WriteLine($"ID: {id.GetInt32()}");
}
if (item.TryGetProperty("name", out JsonElement name))
{
Console.WriteLine($"Name: {name.GetString()}");
}
}
7.4.10.2 - Chunking and Parallel Processing
// Process a large file in chunks
public void ProcessLargeFileInChunks(string filePath, int chunkSize)
{
const int bufferSize = 4096;
byte[] buffer = new byte[bufferSize];
int bytesRead;
using (FileStream fs = new FileStream(filePath, FileMode.Open))
{
// Process the file in chunks
while ((bytesRead = fs.Read(buffer, 0, buffer.Length)) > 0)
{
ProcessDataChunk(buffer, bytesRead);
}
}
}
private void ProcessDataChunk(byte[] data, int length)
{
// Process the chunk of data
Console.WriteLine($"Processing {length} bytes");
}
// Parallel processing of large data
public void ProcessLargeDataInParallel<T>(IEnumerable<T> items, Action<T> processItem, int degreeOfParallelism = -1)
{
ParallelOptions options = new ParallelOptions
{
MaxDegreeOfParallelism = degreeOfParallelism > 0
? degreeOfParallelism
: Environment.ProcessorCount
};
Parallel.ForEach(items, options, item =>
{
processItem(item);
});
}
// Example usage
public void ParallelProcessingExample()
{
// Generate a large dataset
var largeDataset = Enumerable.Range(1, 1000000).ToList();
// Process in parallel
ProcessLargeDataInParallel(largeDataset, item =>
{
// Process each item
double result = Math.Sqrt(item);
});
}
7.4.10.3 - Memory-Efficient Data Structures
// Use memory-mapped files for large data
public void MemoryMappedFileExample(string filePath)
{
using (MemoryMappedFile mmf = MemoryMappedFile.CreateFromFile(filePath))
{
// Access a portion of the file
using (MemoryMappedViewAccessor accessor = mmf.CreateViewAccessor(0, 1024))
{
// Read data from the view
byte value = accessor.ReadByte(0);
int intValue = accessor.ReadInt32(4);
// Write data to the view
accessor.Write(8, 42);
accessor.Write(12, "Hello"u8);
}
// Process the file in chunks
long fileSize = new FileInfo(filePath).Length;
long chunkSize = 1024 * 1024; // 1 MB chunks
for (long position = 0; position < fileSize; position += chunkSize)
{
long size = Math.Min(chunkSize, fileSize - position);
using (MemoryMappedViewStream stream = mmf.CreateViewStream(position, size))
{
ProcessStreamChunk(stream);
}
}
}
}
private void ProcessStreamChunk(Stream stream)
{
// Process the chunk of data
byte[] buffer = new byte[4096];
int bytesRead;
while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0)
{
// Process buffer
}
}
// Use sparse collections for large, sparse datasets
public void SparseCollectionExample()
{
// Dictionary-based sparse array
var sparseArray = new Dictionary<long, double>();
// Set values (only stores non-zero elements)
sparseArray[1000000] = 42.0;
sparseArray[5000000] = 17.5;
// Access values
double value = sparseArray.TryGetValue(1000000, out double result) ? result : 0.0;
// Iterate over non-zero elements
foreach (var kvp in sparseArray)
{
Console.WriteLine($"Index: {kvp.Key}, Value: {kvp.Value}");
}
}
7.4.10.4 - Database Techniques for Large Data
// Process large database results with yield
public IEnumerable<Customer> GetCustomersStreaming(string connectionString)
{
using (SqlConnection connection = new SqlConnection(connectionString))
{
connection.Open();
using (SqlCommand command = new SqlCommand("SELECT * FROM Customers", connection))
{
using (SqlDataReader reader = command.ExecuteReader())
{
while (reader.Read())
{
yield return new Customer
{
Id = reader.GetInt32(0),
Name = reader.GetString(1),
Email = reader.GetString(2)
};
}
}
}
}
}
// Usage
public void ProcessLargeDatabaseResults()
{
string connectionString = "Server=.;Database=MyDb;Trusted_Connection=True;";
// Process customers one at a time without loading all into memory
foreach (var customer in GetCustomersStreaming(connectionString))
{
ProcessCustomer(customer);
}
}
private void ProcessCustomer(Customer customer)
{
// Process each customer
Console.WriteLine($"Processing customer: {customer.Name}");
}
// Batch processing for large database operations
public void BatchInsertCustomers(List<Customer> customers, string connectionString)
{
const int batchSize = 1000;
using (SqlConnection connection = new SqlConnection(connectionString))
{
connection.Open();
using (SqlTransaction transaction = connection.BeginTransaction())
{
try
{
for (int i = 0; i < customers.Count; i += batchSize)
{
// Process a batch
var batch = customers.Skip(i).Take(batchSize).ToList();
using (SqlCommand command = connection.CreateCommand())
{
command.Transaction = transaction;
command.CommandText = "INSERT INTO Customers (Name, Email) VALUES (@Name, @Email)";
SqlParameter nameParam = command.Parameters.Add("@Name", SqlDbType.NVarChar, 100);
SqlParameter emailParam = command.Parameters.Add("@Email", SqlDbType.NVarChar, 100);
foreach (var customer in batch)
{
nameParam.Value = customer.Name;
emailParam.Value = customer.Email;
command.ExecuteNonQuery();
}
}
}
transaction.Commit();
}
catch
{
transaction.Rollback();
throw;
}
}
}
}
Summary
Advanced data handling and manipulation in C# involves mastering a variety of techniques, from LINQ queries to efficient processing of XML and JSON data. By understanding serialization, compression, and strategies for working with large datasets, you can build applications that effectively manage data of any size or complexity.