upskill-event-manager/wordpress-dev/tests/e2e/utils/HtmlDumpParser.ts
bengizmo d6211ee364 feat(testing): Implement HVAC_Test_User_Factory and update .gitignore
- Add HVAC_Test_User_Factory class with:
  * User creation with specific roles
  * Multiple role support
  * Persona management system
  * Account cleanup integration
- Create comprehensive test suite in HVAC_Test_User_Factory_Test.php
- Update testing improvement plan documentation
- Add implementation decisions to project memory bank
- Restructure .gitignore with:
  * Whitelist approach for better file management
  * Explicit backup exclusions
  * Specific bin directory inclusions

Part of the Account Management component from the testing framework improvement plan.
2025-04-14 17:41:36 -03:00

177 lines
No EOL
5.6 KiB
TypeScript

import { JSDOM } from 'jsdom';
import fs from 'fs/promises';
import path from 'path';
export interface HtmlDumpOptions {
removeScripts?: boolean;
removeStyles?: boolean;
removeInlineHandlers?: boolean;
removeComments?: boolean;
removeDataAttributes?: boolean;
preserveSelectors?: string[];
excludeSelectors?: string[];
}
interface ReductionStats {
originalSize: number;
removedScripts: number;
removedStyles: number;
removedHandlers: number;
removedComments: number;
removedDataAttrs: number;
finalSize: number;
reductionPercentage: number;
}
export class HtmlDumpParser {
private defaultOptions: HtmlDumpOptions = {
removeScripts: true,
removeStyles: true,
removeInlineHandlers: true,
removeComments: true,
removeDataAttributes: true,
preserveSelectors: [],
excludeSelectors: []
};
async parseAndReduce(
htmlContent: string,
options: HtmlDumpOptions = {}
): Promise<{ content: string; reductionStats: ReductionStats }> {
const opts = { ...this.defaultOptions, ...options };
const originalSize = htmlContent.length;
const dom = new JSDOM(htmlContent);
const document = dom.window.document;
// Track reduction statistics
const stats: ReductionStats = {
originalSize,
removedScripts: 0,
removedStyles: 0,
removedHandlers: 0,
removedComments: 0,
removedDataAttrs: 0,
finalSize: 0,
reductionPercentage: 0
};
// Process preserved elements first
const preservedContent = new Map<string, string>();
if (opts.preserveSelectors?.length) {
opts.preserveSelectors.forEach(selector => {
document.querySelectorAll(selector).forEach((el: Element, index: number) => {
const placeholder = `__PRESERVED_${selector}_${index}__`;
preservedContent.set(placeholder, el.outerHTML);
el.outerHTML = placeholder;
});
});
}
// Remove excluded elements
if (opts.excludeSelectors?.length) {
opts.excludeSelectors.forEach(selector => {
document.querySelectorAll(selector).forEach((el: Element) => el.remove());
});
}
// Remove scripts
if (opts.removeScripts) {
stats.removedScripts = this.removeElements(document, 'script');
}
// Remove styles
if (opts.removeStyles) {
stats.removedStyles = this.removeElements(document, 'style, link[rel="stylesheet"]');
}
// Remove inline handlers
if (opts.removeInlineHandlers) {
stats.removedHandlers = this.removeInlineHandlers(document);
}
// Remove comments
if (opts.removeComments) {
stats.removedComments = this.removeComments(document);
}
// Remove data attributes
if (opts.removeDataAttributes) {
stats.removedDataAttrs = this.removeDataAttributes(document);
}
// Restore preserved content
let reducedHtml = document.documentElement.outerHTML;
preservedContent.forEach((content, placeholder) => {
reducedHtml = reducedHtml.replace(placeholder, content);
});
// Calculate final statistics
stats.finalSize = reducedHtml.length;
stats.reductionPercentage = ((originalSize - stats.finalSize) / originalSize) * 100;
return { content: reducedHtml, reductionStats: stats };
}
async processFile(
inputPath: string,
outputPath: string,
options: HtmlDumpOptions = {}
): Promise<ReductionStats> {
const content = await fs.readFile(inputPath, 'utf8');
const { content: reducedContent, reductionStats } = await this.parseAndReduce(content, options);
await fs.mkdir(path.dirname(outputPath), { recursive: true });
await fs.writeFile(outputPath, reducedContent);
return reductionStats;
}
private removeElements(document: Document, selector: string): number {
const elements = document.querySelectorAll(selector);
elements.forEach((el: Element) => el.remove());
return elements.length;
}
private removeInlineHandlers(document: Document): number {
let count = 0;
document.querySelectorAll('*').forEach((el: Element) => {
const attrs = el.attributes;
for (let i = attrs.length - 1; i >= 0; i--) {
if (attrs[i].name.startsWith('on')) {
el.removeAttribute(attrs[i].name);
count++;
}
}
});
return count;
}
private removeComments(document: Document): number {
let count = 0;
const iterator = document.createNodeIterator(
document,
NodeFilter.SHOW_COMMENT,
null
);
let node: Node | null;
while ((node = iterator.nextNode())) {
node.parentNode?.removeChild(node);
count++;
}
return count;
}
private removeDataAttributes(document: Document): number {
let count = 0;
document.querySelectorAll('*').forEach((el: Element) => {
const attrs = el.attributes;
for (let i = attrs.length - 1; i >= 0; i--) {
if (attrs[i].name.startsWith('data-')) {
el.removeAttribute(attrs[i].name);
count++;
}
}
});
return count;
}
}