Add Transformation model
This commit is contained in:
@@ -30,6 +30,11 @@ scraper:
|
|||||||
steps:
|
steps:
|
||||||
- type: "xpath"
|
- type: "xpath"
|
||||||
value: "//li/text()"
|
value: "//li/text()"
|
||||||
|
transform:
|
||||||
|
- name: "replace"
|
||||||
|
parameters:
|
||||||
|
- " ("
|
||||||
|
- ""
|
||||||
language:
|
language:
|
||||||
steps:
|
steps:
|
||||||
- type: "xpath"
|
- type: "xpath"
|
||||||
|
|||||||
@@ -1,38 +0,0 @@
|
|||||||
package com.rak.model.transform;
|
|
||||||
|
|
||||||
import jakarta.enterprise.context.ApplicationScoped;
|
|
||||||
|
|
||||||
import java.time.LocalDate;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.regex.Matcher;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
@ApplicationScoped
|
|
||||||
public class TestRegistry {
|
|
||||||
private final Map<String, Transformation> registry = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
public TestRegistry() {
|
|
||||||
// Register built-in transformations
|
|
||||||
register("trim", string -> string.trim());
|
|
||||||
register("upper", String::toUpperCase);
|
|
||||||
register("parseInt", s -> Integer.parseInt((String) s));
|
|
||||||
register("parseFloat", s -> Float.parseFloat((String) s));
|
|
||||||
register("parseDate", s -> LocalDate.parse((String) s));
|
|
||||||
register("extract", this::extract);
|
|
||||||
}
|
|
||||||
|
|
||||||
public void register(String name, Transformation transformation) {
|
|
||||||
registry.put(name, transformation);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Transformation get(String name) {
|
|
||||||
return registry.get(name);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Object extract(Object input, String pattern) {
|
|
||||||
Pattern regex = Pattern.compile(pattern);
|
|
||||||
Matcher matcher = regex.matcher((String) input);
|
|
||||||
return matcher.find() ? matcher.group(1) : input;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
package com.rak.config.converter
|
package com.rak.config
|
||||||
|
|
||||||
import com.rak.config.Step
|
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
interface AbstractModelDefinition {
|
interface AbstractModelDefinition {
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
package com.rak.config
|
package com.rak.config
|
||||||
|
|
||||||
import com.rak.config.converter.AbstractModelDefinition
|
|
||||||
import io.smallrye.config.WithName
|
import io.smallrye.config.WithName
|
||||||
|
|
||||||
interface RegionalSetDefinition : AbstractModelDefinition {
|
interface RegionalSetDefinition : AbstractModelDefinition {
|
||||||
|
|||||||
@@ -1,5 +1,8 @@
|
|||||||
package com.rak.config
|
package com.rak.config
|
||||||
|
|
||||||
|
import java.util.*
|
||||||
|
|
||||||
interface SelectorDefinition {
|
interface SelectorDefinition {
|
||||||
fun steps(): Set<Step>
|
fun steps(): Set<Step>
|
||||||
|
fun transform(): Optional<Set<TransformationStep>>
|
||||||
}
|
}
|
||||||
10
src/main/kotlin/com/rak/config/TransformationStep.kt
Normal file
10
src/main/kotlin/com/rak/config/TransformationStep.kt
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
package com.rak.config
|
||||||
|
|
||||||
|
import com.rak.config.converter.EmptyStringConverter
|
||||||
|
import io.smallrye.config.WithConverter
|
||||||
|
|
||||||
|
interface TransformationStep {
|
||||||
|
fun name(): String
|
||||||
|
@WithConverter(EmptyStringConverter::class)
|
||||||
|
fun parameters(): MutableList<String?>
|
||||||
|
}
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
package com.rak.config.converter
|
||||||
|
|
||||||
|
import jakarta.annotation.Priority
|
||||||
|
import org.eclipse.microprofile.config.spi.Converter
|
||||||
|
|
||||||
|
@Priority(1)
|
||||||
|
class EmptyStringConverter : Converter<String> {
|
||||||
|
override fun convert(value: String): String {
|
||||||
|
return value
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,5 +2,5 @@ package com.rak.model.transform
|
|||||||
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
fun interface ParameterizedTransformation : AbstractTransformation {
|
fun interface ParameterizedTransformation : AbstractTransformation {
|
||||||
fun apply(input: Any, vararg parameters: Any): Any?
|
fun apply(input: String, vararg parameters: String): Any?
|
||||||
}
|
}
|
||||||
@@ -2,5 +2,5 @@ package com.rak.model.transform
|
|||||||
|
|
||||||
@FunctionalInterface
|
@FunctionalInterface
|
||||||
fun interface Transformation : AbstractTransformation {
|
fun interface Transformation : AbstractTransformation {
|
||||||
fun apply(input: Any): Any?
|
fun apply(input: String): String?
|
||||||
}
|
}
|
||||||
@@ -1,27 +1,56 @@
|
|||||||
package com.rak.model.transform
|
package com.rak.model.transform
|
||||||
|
|
||||||
import jakarta.enterprise.context.ApplicationScoped
|
import java.util.concurrent.ConcurrentHashMap
|
||||||
|
|
||||||
@ApplicationScoped
|
|
||||||
class TransformationRegistry {
|
class TransformationRegistry {
|
||||||
|
|
||||||
private val transformations = hashMapOf<String, (input: Any) -> Any>()
|
private val transformations: ConcurrentHashMap<String, Transformation> = ConcurrentHashMap()
|
||||||
|
private val parameterizedTransformation: ConcurrentHashMap<String, ParameterizedTransformation> =
|
||||||
|
ConcurrentHashMap()
|
||||||
|
|
||||||
init {
|
init {
|
||||||
register<String>("trim") {
|
register("trim") { it.trim() }
|
||||||
(it as String).trim()
|
register("replace") { input, parameters ->
|
||||||
|
require(parameters.size == 2) {
|
||||||
|
"'replace' requires exactly 2 parameters"
|
||||||
|
}
|
||||||
|
input.replace(parameters[0], parameters[1])
|
||||||
|
}
|
||||||
|
register("regexReplace") { input, params ->
|
||||||
|
require(params.size == 2) {
|
||||||
|
"'regexReplace' requires exactly 2 parameters"
|
||||||
|
}
|
||||||
|
input.replace(params[0].toRegex(), params[1])
|
||||||
}
|
}
|
||||||
register<String>("replace", { s: Any ->
|
|
||||||
Integer.
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// fun <T : Any> register(name: String, transformation: (input: Any) -> T) {
|
|
||||||
// transformations[name] = transformation
|
|
||||||
// }
|
|
||||||
|
|
||||||
fun <T : Any> register(name: String, transformation: Transformation) {
|
fun register(name: String, transformation: Transformation) {
|
||||||
|
transformations.put(name, transformation)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun register(name: String, transformation: ParameterizedTransformation) {
|
||||||
|
parameterizedTransformation.put(name, transformation)
|
||||||
|
}
|
||||||
|
|
||||||
|
fun getTransformation(name: String, parameters: List<String>? = null): AbstractTransformation {
|
||||||
|
return when {
|
||||||
|
transformations.containsKey(name) -> {
|
||||||
|
if (!parameters.isNullOrEmpty()) {
|
||||||
|
throw IllegalArgumentException("'$name' doesn't accept parameters")
|
||||||
|
} else {
|
||||||
|
transformations[name]!!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parameterizedTransformation.containsKey(name) -> {
|
||||||
|
if (parameters.isNullOrEmpty()) {
|
||||||
|
throw IllegalArgumentException("'$name' requires parameters")
|
||||||
|
} else {
|
||||||
|
parameterizedTransformation[name]!!
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else -> throw IllegalArgumentException("Unknown transformation: '$name'")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@@ -26,8 +26,8 @@ class XPathUtil private constructor() {
|
|||||||
.firstOrNull()?.text()
|
.firstOrNull()?.text()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun getNextElement(root: Element, path: String): Element? {
|
fun getNextElement(element: Element, path: String): Element? {
|
||||||
return root.selectXpath(path).firstOrNull()
|
return element.selectXpath(path).firstOrNull()
|
||||||
}
|
}
|
||||||
|
|
||||||
fun extractResult(root: Element, path: String): String? {
|
fun extractResult(root: Element, path: String): String? {
|
||||||
|
|||||||
@@ -1 +1 @@
|
|||||||
com.rak.config.converter.TypeSelectorConverter
|
com.rak.config.converter.TypeSelectorConverter
|
||||||
|
|||||||
@@ -36,6 +36,11 @@ scraper:
|
|||||||
steps:
|
steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
value: "//li/text()"
|
value: "//li/text()"
|
||||||
|
transform:
|
||||||
|
- name: "replace"
|
||||||
|
parameters:
|
||||||
|
- " ("
|
||||||
|
- ""
|
||||||
language:
|
language:
|
||||||
steps:
|
steps:
|
||||||
- type: xpath
|
- type: xpath
|
||||||
|
|||||||
Reference in New Issue
Block a user