diff --git a/sources.yml b/sources.yml index 45ba8b3..083c79d 100644 --- a/sources.yml +++ b/sources.yml @@ -30,6 +30,11 @@ scraper: steps: - type: "xpath" value: "//li/text()" + transform: + - name: "replace" + parameters: + - " (" + - "" language: steps: - type: "xpath" diff --git a/src/main/java/com/rak/model/transform/TestRegistry.java b/src/main/java/com/rak/model/transform/TestRegistry.java deleted file mode 100644 index 86c0104..0000000 --- a/src/main/java/com/rak/model/transform/TestRegistry.java +++ /dev/null @@ -1,38 +0,0 @@ -package com.rak.model.transform; - -import jakarta.enterprise.context.ApplicationScoped; - -import java.time.LocalDate; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -@ApplicationScoped -public class TestRegistry { - private final Map registry = new ConcurrentHashMap<>(); - - public TestRegistry() { - // Register built-in transformations - register("trim", string -> string.trim()); - register("upper", String::toUpperCase); - register("parseInt", s -> Integer.parseInt((String) s)); - register("parseFloat", s -> Float.parseFloat((String) s)); - register("parseDate", s -> LocalDate.parse((String) s)); - register("extract", this::extract); - } - - public void register(String name, Transformation transformation) { - registry.put(name, transformation); - } - - public Transformation get(String name) { - return registry.get(name); - } - - private Object extract(Object input, String pattern) { - Pattern regex = Pattern.compile(pattern); - Matcher matcher = regex.matcher((String) input); - return matcher.find() ? matcher.group(1) : input; - } -} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/converter/AbstractModelDefinition.kt b/src/main/kotlin/com/rak/config/AbstractModelDefinition.kt similarity index 67% rename from src/main/kotlin/com/rak/config/converter/AbstractModelDefinition.kt rename to src/main/kotlin/com/rak/config/AbstractModelDefinition.kt index 77bcc6d..45c93ed 100644 --- a/src/main/kotlin/com/rak/config/converter/AbstractModelDefinition.kt +++ b/src/main/kotlin/com/rak/config/AbstractModelDefinition.kt @@ -1,6 +1,5 @@ -package com.rak.config.converter +package com.rak.config -import com.rak.config.Step import io.smallrye.config.WithName interface AbstractModelDefinition { diff --git a/src/main/kotlin/com/rak/config/RegionalSetDefinition.kt b/src/main/kotlin/com/rak/config/RegionalSetDefinition.kt index 3283721..f5c7b92 100644 --- a/src/main/kotlin/com/rak/config/RegionalSetDefinition.kt +++ b/src/main/kotlin/com/rak/config/RegionalSetDefinition.kt @@ -1,6 +1,5 @@ package com.rak.config -import com.rak.config.converter.AbstractModelDefinition import io.smallrye.config.WithName interface RegionalSetDefinition : AbstractModelDefinition { diff --git a/src/main/kotlin/com/rak/config/SelectorDefinition.kt b/src/main/kotlin/com/rak/config/SelectorDefinition.kt index 0c01ba9..b20b9b0 100644 --- a/src/main/kotlin/com/rak/config/SelectorDefinition.kt +++ b/src/main/kotlin/com/rak/config/SelectorDefinition.kt @@ -1,5 +1,8 @@ package com.rak.config +import java.util.* + interface SelectorDefinition { fun steps(): Set + fun transform(): Optional> } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/TransformationStep.kt b/src/main/kotlin/com/rak/config/TransformationStep.kt new file mode 100644 index 0000000..c0d23a4 --- /dev/null +++ b/src/main/kotlin/com/rak/config/TransformationStep.kt @@ -0,0 +1,10 @@ +package com.rak.config + +import com.rak.config.converter.EmptyStringConverter +import io.smallrye.config.WithConverter + +interface TransformationStep { + fun name(): String + @WithConverter(EmptyStringConverter::class) + fun parameters(): MutableList +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/config/converter/EmptyStringConverter.kt b/src/main/kotlin/com/rak/config/converter/EmptyStringConverter.kt new file mode 100644 index 0000000..237fed8 --- /dev/null +++ b/src/main/kotlin/com/rak/config/converter/EmptyStringConverter.kt @@ -0,0 +1,11 @@ +package com.rak.config.converter + +import jakarta.annotation.Priority +import org.eclipse.microprofile.config.spi.Converter + +@Priority(1) +class EmptyStringConverter : Converter { + override fun convert(value: String): String { + return value + } +} \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt b/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt index c0fb037..bca8fd8 100644 --- a/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt +++ b/src/main/kotlin/com/rak/model/transform/ParameterizedTransformation.kt @@ -2,5 +2,5 @@ package com.rak.model.transform @FunctionalInterface fun interface ParameterizedTransformation : AbstractTransformation { - fun apply(input: Any, vararg parameters: Any): Any? + fun apply(input: String, vararg parameters: String): Any? } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/transform/Transformation.kt b/src/main/kotlin/com/rak/model/transform/Transformation.kt index 2d1051c..2b924ee 100644 --- a/src/main/kotlin/com/rak/model/transform/Transformation.kt +++ b/src/main/kotlin/com/rak/model/transform/Transformation.kt @@ -2,5 +2,5 @@ package com.rak.model.transform @FunctionalInterface fun interface Transformation : AbstractTransformation { - fun apply(input: Any): Any? + fun apply(input: String): String? } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt b/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt index ba0559d..1dfc2c5 100644 --- a/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt +++ b/src/main/kotlin/com/rak/model/transform/TransformationRegistry.kt @@ -1,27 +1,56 @@ package com.rak.model.transform -import jakarta.enterprise.context.ApplicationScoped +import java.util.concurrent.ConcurrentHashMap -@ApplicationScoped class TransformationRegistry { - private val transformations = hashMapOf Any>() + private val transformations: ConcurrentHashMap = ConcurrentHashMap() + private val parameterizedTransformation: ConcurrentHashMap = + ConcurrentHashMap() init { - register("trim") { - (it as String).trim() + register("trim") { it.trim() } + register("replace") { input, parameters -> + require(parameters.size == 2) { + "'replace' requires exactly 2 parameters" + } + input.replace(parameters[0], parameters[1]) + } + register("regexReplace") { input, params -> + require(params.size == 2) { + "'regexReplace' requires exactly 2 parameters" + } + input.replace(params[0].toRegex(), params[1]) } - register("replace", { s: Any -> - Integer. - }) } -// fun register(name: String, transformation: (input: Any) -> T) { -// transformations[name] = transformation -// } - fun register(name: String, transformation: Transformation) { + fun register(name: String, transformation: Transformation) { + transformations.put(name, transformation) + } + fun register(name: String, transformation: ParameterizedTransformation) { + parameterizedTransformation.put(name, transformation) + } + + fun getTransformation(name: String, parameters: List? = null): AbstractTransformation { + return when { + transformations.containsKey(name) -> { + if (!parameters.isNullOrEmpty()) { + throw IllegalArgumentException("'$name' doesn't accept parameters") + } else { + transformations[name]!! + } + } + parameterizedTransformation.containsKey(name) -> { + if (parameters.isNullOrEmpty()) { + throw IllegalArgumentException("'$name' requires parameters") + } else { + parameterizedTransformation[name]!! + } + } + else -> throw IllegalArgumentException("Unknown transformation: '$name'") + } } } \ No newline at end of file diff --git a/src/main/kotlin/com/rak/util/XPathUtil.kt b/src/main/kotlin/com/rak/util/XPathUtil.kt index adb663b..ae4c1a8 100644 --- a/src/main/kotlin/com/rak/util/XPathUtil.kt +++ b/src/main/kotlin/com/rak/util/XPathUtil.kt @@ -26,8 +26,8 @@ class XPathUtil private constructor() { .firstOrNull()?.text() } - fun getNextElement(root: Element, path: String): Element? { - return root.selectXpath(path).firstOrNull() + fun getNextElement(element: Element, path: String): Element? { + return element.selectXpath(path).firstOrNull() } fun extractResult(root: Element, path: String): String? { diff --git a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter index c2a4dbc..2658a71 100644 --- a/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter +++ b/src/main/resources/META-INF/services/org.eclipse.microprofile.config.spi.Converter @@ -1 +1 @@ -com.rak.config.converter.TypeSelectorConverter \ No newline at end of file +com.rak.config.converter.TypeSelectorConverter diff --git a/src/main/resources/application.yml b/src/main/resources/application.yml index 266bb17..0d322b7 100644 --- a/src/main/resources/application.yml +++ b/src/main/resources/application.yml @@ -36,6 +36,11 @@ scraper: steps: - type: xpath value: "//li/text()" + transform: + - name: "replace" + parameters: + - " (" + - "" language: steps: - type: xpath