Add Transformation model

This commit is contained in:
2025-06-24 15:23:12 +02:00
parent 9db3753105
commit 8cc9a64111
13 changed files with 81 additions and 58 deletions

View File

@@ -30,6 +30,11 @@ scraper:
steps:
- type: "xpath"
value: "//li/text()"
transform:
- name: "replace"
parameters:
- " ("
- ""
language:
steps:
- type: "xpath"

View File

@@ -1,38 +0,0 @@
package com.rak.model.transform;
import jakarta.enterprise.context.ApplicationScoped;
import java.time.LocalDate;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@ApplicationScoped
public class TestRegistry {
private final Map<String, Transformation> registry = new ConcurrentHashMap<>();
public TestRegistry() {
// Register built-in transformations
register("trim", string -> string.trim());
register("upper", String::toUpperCase);
register("parseInt", s -> Integer.parseInt((String) s));
register("parseFloat", s -> Float.parseFloat((String) s));
register("parseDate", s -> LocalDate.parse((String) s));
register("extract", this::extract);
}
public void register(String name, Transformation transformation) {
registry.put(name, transformation);
}
public Transformation get(String name) {
return registry.get(name);
}
private Object extract(Object input, String pattern) {
Pattern regex = Pattern.compile(pattern);
Matcher matcher = regex.matcher((String) input);
return matcher.find() ? matcher.group(1) : input;
}
}

View File

@@ -1,6 +1,5 @@
package com.rak.config.converter
package com.rak.config
import com.rak.config.Step
import io.smallrye.config.WithName
interface AbstractModelDefinition {

View File

@@ -1,6 +1,5 @@
package com.rak.config
import com.rak.config.converter.AbstractModelDefinition
import io.smallrye.config.WithName
interface RegionalSetDefinition : AbstractModelDefinition {

View File

@@ -1,5 +1,8 @@
package com.rak.config
import java.util.*
interface SelectorDefinition {
fun steps(): Set<Step>
fun transform(): Optional<Set<TransformationStep>>
}

View File

@@ -0,0 +1,10 @@
package com.rak.config
import com.rak.config.converter.EmptyStringConverter
import io.smallrye.config.WithConverter
interface TransformationStep {
fun name(): String
@WithConverter(EmptyStringConverter::class)
fun parameters(): MutableList<String?>
}

View File

@@ -0,0 +1,11 @@
package com.rak.config.converter
import jakarta.annotation.Priority
import org.eclipse.microprofile.config.spi.Converter
@Priority(1)
class EmptyStringConverter : Converter<String> {
override fun convert(value: String): String {
return value
}
}

View File

@@ -2,5 +2,5 @@ package com.rak.model.transform
@FunctionalInterface
fun interface ParameterizedTransformation : AbstractTransformation {
fun apply(input: Any, vararg parameters: Any): Any?
fun apply(input: String, vararg parameters: String): Any?
}

View File

@@ -2,5 +2,5 @@ package com.rak.model.transform
@FunctionalInterface
fun interface Transformation : AbstractTransformation {
fun apply(input: Any): Any?
fun apply(input: String): String?
}

View File

@@ -1,27 +1,56 @@
package com.rak.model.transform
import jakarta.enterprise.context.ApplicationScoped
import java.util.concurrent.ConcurrentHashMap
@ApplicationScoped
class TransformationRegistry {
private val transformations = hashMapOf<String, (input: Any) -> Any>()
private val transformations: ConcurrentHashMap<String, Transformation> = ConcurrentHashMap()
private val parameterizedTransformation: ConcurrentHashMap<String, ParameterizedTransformation> =
ConcurrentHashMap()
init {
register<String>("trim") {
(it as String).trim()
register("trim") { it.trim() }
register("replace") { input, parameters ->
require(parameters.size == 2) {
"'replace' requires exactly 2 parameters"
}
input.replace(parameters[0], parameters[1])
}
register("regexReplace") { input, params ->
require(params.size == 2) {
"'regexReplace' requires exactly 2 parameters"
}
input.replace(params[0].toRegex(), params[1])
}
register<String>("replace", { s: Any ->
Integer.
})
}
// fun <T : Any> register(name: String, transformation: (input: Any) -> T) {
// transformations[name] = transformation
// }
fun <T : Any> register(name: String, transformation: Transformation) {
fun register(name: String, transformation: Transformation) {
transformations.put(name, transformation)
}
fun register(name: String, transformation: ParameterizedTransformation) {
parameterizedTransformation.put(name, transformation)
}
fun getTransformation(name: String, parameters: List<String>? = null): AbstractTransformation {
return when {
transformations.containsKey(name) -> {
if (!parameters.isNullOrEmpty()) {
throw IllegalArgumentException("'$name' doesn't accept parameters")
} else {
transformations[name]!!
}
}
parameterizedTransformation.containsKey(name) -> {
if (parameters.isNullOrEmpty()) {
throw IllegalArgumentException("'$name' requires parameters")
} else {
parameterizedTransformation[name]!!
}
}
else -> throw IllegalArgumentException("Unknown transformation: '$name'")
}
}
}

View File

@@ -26,8 +26,8 @@ class XPathUtil private constructor() {
.firstOrNull()?.text()
}
fun getNextElement(root: Element, path: String): Element? {
return root.selectXpath(path).firstOrNull()
fun getNextElement(element: Element, path: String): Element? {
return element.selectXpath(path).firstOrNull()
}
fun extractResult(root: Element, path: String): String? {

View File

@@ -36,6 +36,11 @@ scraper:
steps:
- type: xpath
value: "//li/text()"
transform:
- name: "replace"
parameters:
- " ("
- ""
language:
steps:
- type: xpath