SAMZA-1268: Javadoc cleanup for public APIs for 0.13 release
[samza.git] / samza-api / src / main / java / org / apache / samza / operators / MessageStream.java
1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19 package org.apache.samza.operators;
20
21 import org.apache.samza.annotation.InterfaceStability;
22 import org.apache.samza.operators.functions.FilterFunction;
23 import org.apache.samza.operators.functions.FlatMapFunction;
24 import org.apache.samza.operators.functions.JoinFunction;
25 import org.apache.samza.operators.functions.MapFunction;
26 import org.apache.samza.operators.functions.SinkFunction;
27 import org.apache.samza.operators.windows.Window;
28 import org.apache.samza.operators.windows.WindowPane;
29
30 import java.time.Duration;
31 import java.util.Collection;
32 import java.util.function.Function;
33
34
35 /**
36 * A stream of messages that can be transformed into another {@link MessageStream}.
37 * <p>
38 * A {@link MessageStream} corresponding to an input stream can be obtained using {@link StreamGraph#getInputStream}.
39 *
40 * @param <M> the type of messages in this stream
41 */
42 @InterfaceStability.Unstable
43 public interface MessageStream<M> {
44
45 /**
46 * Applies the provided 1:1 function to messages in this {@link MessageStream} and returns the
47 * transformed {@link MessageStream}.
48 *
49 * @param mapFn the function to transform a message to another message
50 * @param <OM> the type of messages in the transformed {@link MessageStream}
51 * @return the transformed {@link MessageStream}
52 */
53 <OM> MessageStream<OM> map(MapFunction<? super M, ? extends OM> mapFn);
54
55 /**
56 * Applies the provided 1:n function to transform a message in this {@link MessageStream}
57 * to n messages in the transformed {@link MessageStream}
58 *
59 * @param flatMapFn the function to transform a message to zero or more messages
60 * @param <OM> the type of messages in the transformed {@link MessageStream}
61 * @return the transformed {@link MessageStream}
62 */
63 <OM> MessageStream<OM> flatMap(FlatMapFunction<? super M, ? extends OM> flatMapFn);
64
65 /**
66 * Applies the provided function to messages in this {@link MessageStream} and returns the
67 * filtered {@link MessageStream}.
68 * <p>
69 * The {@link Function} is a predicate which determines whether a message in this {@link MessageStream}
70 * should be retained in the filtered {@link MessageStream}.
71 *
72 * @param filterFn the predicate to filter messages from this {@link MessageStream}.
73 * @return the transformed {@link MessageStream}
74 */
75 MessageStream<M> filter(FilterFunction<? super M> filterFn);
76
77 /**
78 * Allows sending messages in this {@link MessageStream} to an output system using the provided {@link SinkFunction}.
79 * <p>
80 * Offers more control over processing and sending messages than {@link #sendTo(OutputStream)} since
81 * the {@link SinkFunction} has access to the {@link org.apache.samza.task.MessageCollector} and
82 * {@link org.apache.samza.task.TaskCoordinator}.
83 * <p>
84 * This can also be used to send output to a system (e.g. a database) that doesn't have a corresponding
85 * Samza SystemProducer implementation.
86 *
87 * @param sinkFn the function to send messages in this stream to an external system
88 */
89 void sink(SinkFunction<? super M> sinkFn);
90
91 /**
92 * Allows sending messages in this {@link MessageStream} to an {@link OutputStream}.
93 *
94 * @param outputStream the output stream to send messages to
95 * @param <K> the type of key in the outgoing message
96 * @param <V> the type of message in the outgoing message
97 */
98 <K, V> void sendTo(OutputStream<K, V, M> outputStream);
99
100 /**
101 * Groups the messages in this {@link MessageStream} according to the provided {@link Window} semantics
102 * (e.g. tumbling, sliding or session windows) and returns the transformed {@link MessageStream} of
103 * {@link WindowPane}s.
104 * <p>
105 * Use the {@link org.apache.samza.operators.windows.Windows} helper methods to create the appropriate windows.
106 * <p>
107 * <b>Note:</b> As of version 0.13.0, messages in windows are kept in memory and may be lost in case of failures.
108 *
109 * @param window the window to group and process messages from this {@link MessageStream}
110 * @param <K> the type of key in the message in this {@link MessageStream}. If a key is specified,
111 * panes are emitted per-key.
112 * @param <WV> the type of value in the {@link WindowPane} in the transformed {@link MessageStream}
113 * @return the transformed {@link MessageStream}
114 */
115 <K, WV> MessageStream<WindowPane<K, WV>> window(Window<M, K, WV> window);
116
117 /**
118 * Joins this {@link MessageStream} with another {@link MessageStream} using the provided
119 * pairwise {@link JoinFunction}.
120 * <p>
121 * Messages in each stream are retained for the provided {@code ttl} and join results are
122 * emitted as matches are found.
123 * <p>
124 * <b>Note:</b> As of version 0.13.0, messages in joins are kept in memory and may be lost in case of failures.
125 *
126 * @param otherStream the other {@link MessageStream} to be joined with
127 * @param joinFn the function to join messages from this and the other {@link MessageStream}
128 * @param ttl the ttl for messages in each stream
129 * @param <K> the type of join key
130 * @param <JM> the type of messages in the other stream
131 * @param <OM> the type of messages resulting from the {@code joinFn}
132 * @return the joined {@link MessageStream}
133 */
134 <K, JM, OM> MessageStream<OM> join(MessageStream<JM> otherStream,
135 JoinFunction<? extends K, ? super M, ? super JM, ? extends OM> joinFn, Duration ttl);
136
137 /**
138 * Merges all {@code otherStreams} with this {@link MessageStream}.
139 *
140 * @param otherStreams other {@link MessageStream}s to be merged with this {@link MessageStream}
141 * @return the merged {@link MessageStream}
142 */
143 MessageStream<M> merge(Collection<? extends MessageStream<? extends M>> otherStreams);
144
145 /**
146 * Sends the messages of type {@code M}in this {@link MessageStream} to a repartitioned output stream and consumes
147 * them as an input {@link MessageStream} again. Uses keys returned by the {@code keyExtractor} as the partition key.
148 * <p>
149 * <b>Note</b>: Repartitioned streams are created automatically in the default system. The key and message Serdes
150 * configured for the default system must be able to serialize and deserialize types K and M respectively.
151 *
152 * @param keyExtractor the {@link Function} to extract the output message key and partition key from
153 * the input message
154 * @param <K> the type of output message key and partition key
155 * @return the repartitioned {@link MessageStream}
156 */
157 <K> MessageStream<M> partitionBy(Function<? super M, ? extends K> keyExtractor);
158
159 }